diff options
Diffstat (limited to 'arch')
483 files changed, 27961 insertions, 16225 deletions
diff --git a/arch/arm/boot/dts/am335x-guardian.dts b/arch/arm/boot/dts/am335x-guardian.dts index 81e0f63e94d3..0ebe9e2c150e 100644 --- a/arch/arm/boot/dts/am335x-guardian.dts +++ b/arch/arm/boot/dts/am335x-guardian.dts @@ -105,6 +105,7 @@ ti,timers = <&timer7>; pinctrl-names = "default"; pinctrl-0 = <&dmtimer7_pins>; + ti,clock-source = <0x01>; }; vmmcsd_fixed: regulator-3v3 { diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index a1fd3e63e86e..92466b9eb6ba 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -156,6 +156,7 @@ pinctrl-0 = <&pwm_pins>; ti,timers = <&timer11>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; /* HS USB Host PHY on PORT 1 */ diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index fe59dde41b64..28b35ccb3757 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -204,6 +204,7 @@ pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; + fsl,magic-packet; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 98da446aa0f2..48f50161ea21 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1045,7 +1045,7 @@ <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; clock-names = "ipg", "ahb", "ptp"; - gpr = <&gpr>; + fsl,stop-mode = <&gpr 0x34 27>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index d6f831731460..09f21aaee936 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -930,6 +930,7 @@ "enet_clk_ref", "enet_out"; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; + fsl,stop-mode = <&gpr 0x10 3>; status = "disabled"; }; @@ -1039,6 +1040,7 @@ <&clks IMX6SX_CLK_ENET_PTP>; clock-names = "ipg", "ahb", "ptp", "enet_clk_ref", "enet_out"; + fsl,stop-mode = <&gpr 0x10 4>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 2ccf67c4ac1a..345ae9b0db37 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -520,6 +520,7 @@ "enet_clk_ref", "enet_out"; fsl,num-tx-queues = <1>; fsl,num-rx-queues = <1>; + fsl,stop-mode = <&gpr 0x10 4>; status = "disabled"; }; @@ -856,6 +857,7 @@ "enet_clk_ref", "enet_out"; fsl,num-tx-queues = <1>; fsl,num-rx-queues = <1>; + fsl,stop-mode = <&gpr 0x10 3>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 4c22828df55f..cff875b80b60 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -153,6 +153,7 @@ "enet_clk_ref", "enet_out"; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; + fsl,stop-mode = <&gpr 0x10 4>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 76e3ffbbbfbf..5bf0b39fa99b 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1190,6 +1190,7 @@ "enet_clk_ref", "enet_out"; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; + fsl,stop-mode = <&gpr 0x10 3>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index f7b82ced4080..381f0e82bb70 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -65,6 +65,7 @@ pinctrl-0 = <&pwm_pins>; ti,timers = <&timer10>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; }; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 409a758c99f1..ecc45862b4f3 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -150,6 +150,7 @@ compatible = "ti,omap-dmtimer-pwm"; ti,timers = <&timer11>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; hsusb2_phy: hsusb2_phy { diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index bfa9ce4c6e69..b9839f86e703 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -576,5 +576,33 @@ "legacy"; status = "disabled"; }; + + mdio: mdio@90000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "qcom,ipq4019-mdio"; + reg = <0x90000 0x64>; + status = "disabled"; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; + + ethphy1: ethernet-phy@1 { + reg = <1>; + }; + + ethphy2: ethernet-phy@2 { + reg = <2>; + }; + + ethphy3: ethernet-phy@3 { + reg = <3>; + }; + + ethphy4: ethernet-phy@4 { + reg = <4>; + }; + }; }; }; diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index bf85d6db4931..0207b6ea6e8a 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -795,6 +795,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, case BPF_RSH: emit(ARM_LSR_I(rd, rd, val), ctx); break; + case BPF_ARSH: + emit(ARM_ASR_I(rd, rd, val), ctx); + break; case BPF_NEG: emit(ARM_RSB_I(rd, rd, val), ctx); break; @@ -860,8 +863,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); - _emit(ARM_COND_MI, ARM_B(0), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); + _emit(ARM_COND_PL, + ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); @@ -1408,7 +1411,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: - case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_X: @@ -1465,10 +1467,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: goto notyet; - /* dst = dst >> imm */ /* dst = dst << imm */ - case BPF_ALU | BPF_RSH | BPF_K: + /* dst = dst >> imm */ + /* dst = dst >> imm (signed) */ case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: if (unlikely(imm > 31)) return -EINVAL; if (imm) diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index fb67cbc589e0..e0b593a1498d 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -94,6 +94,9 @@ #define ARM_INST_LSR_I 0x01a00020 #define ARM_INST_LSR_R 0x01a00030 +#define ARM_INST_ASR_I 0x01a00040 +#define ARM_INST_ASR_R 0x01a00050 + #define ARM_INST_MOV_R 0x01a00000 #define ARM_INST_MOVS_R 0x01b00000 #define ARM_INST_MOV_I 0x03a00000 diff --git a/arch/arm64/boot/dts/mediatek/mt8516.dtsi b/arch/arm64/boot/dts/mediatek/mt8516.dtsi index 2f8adf042195..89af661e7f63 100644 --- a/arch/arm64/boot/dts/mediatek/mt8516.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8516.dtsi @@ -191,6 +191,11 @@ #clock-cells = <1>; }; + pericfg: pericfg@10003050 { + compatible = "mediatek,mt8516-pericfg", "syscon"; + reg = <0 0x10003050 0 0x1000>; + }; + apmixedsys: apmixedsys@10018000 { compatible = "mediatek,mt8516-apmixedsys", "syscon"; reg = <0 0x10018000 0 0x710>; @@ -401,6 +406,18 @@ status = "disabled"; }; + ethernet: ethernet@11180000 { + compatible = "mediatek,mt8516-eth"; + reg = <0 0x11180000 0 0x1000>; + mediatek,pericfg = <&pericfg>; + interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_RG_ETH>, + <&topckgen CLK_TOP_66M_ETH>, + <&topckgen CLK_TOP_133M_ETH>; + clock-names = "core", "reg", "trans"; + status = "disabled"; + }; + rng: rng@1020c000 { compatible = "mediatek,mt8516-rng", "mediatek,mt7623-rng"; diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi index a31093d7142b..dfceffe6950a 100644 --- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi +++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi @@ -9,6 +9,7 @@ / { aliases { serial0 = &uart0; + ethernet0 = ðernet; }; chosen { @@ -166,6 +167,24 @@ status = "okay"; }; +ðernet { + pinctrl-names = "default"; + pinctrl-0 = <ðernet_pins_default>; + phy-handle = <ð_phy>; + phy-mode = "rmii"; + mac-address = [00 00 00 00 00 00]; + status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + eth_phy: ethernet-phy@0 { + reg = <0>; + }; + }; +}; + &usb0 { status = "okay"; dr_mode = "peripheral"; @@ -218,4 +237,19 @@ bias-pull-up; }; }; + + ethernet_pins_default: ethernet { + pins_ethernet { + pinmux = <MT8516_PIN_0_EINT0__FUNC_EXT_TXD0>, + <MT8516_PIN_1_EINT1__FUNC_EXT_TXD1>, + <MT8516_PIN_5_EINT5__FUNC_EXT_RXER>, + <MT8516_PIN_6_EINT6__FUNC_EXT_RXC>, + <MT8516_PIN_7_EINT7__FUNC_EXT_RXDV>, + <MT8516_PIN_8_EINT8__FUNC_EXT_RXD0>, + <MT8516_PIN_9_EINT9__FUNC_EXT_RXD1>, + <MT8516_PIN_12_EINT12__FUNC_EXT_TXEN>, + <MT8516_PIN_38_MRG_DI__FUNC_EXT_MDIO>, + <MT8516_PIN_39_MRG_DO__FUNC_EXT_MDC>; + }; + }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 8f926b5234d4..de6bb86c4968 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -1761,6 +1761,8 @@ ipa: ipa@1e40000 { compatible = "qcom,sdm845-ipa"; + + iommus = <&apps_smmu 0x720 0x3>; reg = <0 0x1e40000 0 0x7000>, <0 0x1e47000 0 0x2000>, <0 0x1e04000 0 0x2c000>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 11887c72f23a..0d533d52fcda 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -570,6 +570,28 @@ <0x5>; /* RX_CHAN */ ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */ }; + + cpts@310d0000 { + compatible = "ti,am65-cpts"; + reg = <0x0 0x310d0000 0x0 0x400>; + reg-names = "cpts"; + clocks = <&main_cpts_mux>; + clock-names = "cpts"; + interrupts-extended = <&intr_main_navss 163 0>; + interrupt-names = "cpts"; + ti,cpts-periodic-outputs = <6>; + ti,cpts-ext-ts-inputs = <8>; + + main_cpts_mux: refclk-mux { + #clock-cells = <0>; + clocks = <&k3_clks 118 5>, <&k3_clks 118 11>, + <&k3_clks 118 6>, <&k3_clks 118 3>, + <&k3_clks 118 8>, <&k3_clks 118 14>, + <&k3_clks 120 3>, <&k3_clks 121 3>; + assigned-clocks = <&main_cpts_mux>; + assigned-clock-parents = <&k3_clks 118 5>; + }; + }; }; main_gpio0: main_gpio0@600000 { diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 353d1e2532a7..ae5f813d0cac 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -247,5 +247,26 @@ clock-names = "fck"; bus_freq = <1000000>; }; + + cpts@3d000 { + compatible = "ti,am65-cpts"; + reg = <0x0 0x3d000 0x0 0x400>; + clocks = <&mcu_cpsw_cpts_mux>; + clock-names = "cpts"; + interrupts-extended = <&gic500 GIC_SPI 570 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "cpts"; + ti,cpts-ext-ts-inputs = <4>; + ti,cpts-periodic-outputs = <2>; + + mcu_cpsw_cpts_mux: refclk-mux { + #clock-cells = <0>; + clocks = <&k3_clks 118 5>, <&k3_clks 118 11>, + <&k3_clks 118 6>, <&k3_clks 118 3>, + <&k3_clks 118 8>, <&k3_clks 118 14>, + <&k3_clks 120 3>, <&k3_clks 121 3>; + assigned-clocks = <&mcu_cpsw_cpts_mux>; + assigned-clock-parents = <&k3_clks 118 5>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 0b9d14b838a1..844a5b50cf09 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -254,6 +254,18 @@ <0x0c>; /* RX_UHCHAN */ ti,sci-rm-range-rflow = <0x00>; /* GP RFLOW */ }; + + cpts@310d0000 { + compatible = "ti,j721e-cpts"; + reg = <0x0 0x310d0000 0x0 0x400>; + reg-names = "cpts"; + clocks = <&k3_clks 201 1>; + clock-names = "cpts"; + interrupts-extended = <&main_navss_intr 201 0>; + interrupt-names = "cpts"; + ti,cpts-periodic-outputs = <6>; + ti,cpts-ext-ts-inputs = <8>; + }; }; main_pmx0: pinmux@11c000 { diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index 3d6064125b40..dc31bd0434cb 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -338,5 +338,16 @@ clock-names = "fck"; bus_freq = <1000000>; }; + + cpts@3d000 { + compatible = "ti,am65-cpts"; + reg = <0x0 0x3d000 0x0 0x400>; + clocks = <&k3_clks 18 2>; + clock-names = "cpts"; + interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "cpts"; + ti,cpts-ext-ts-inputs = <4>; + ti,cpts-periodic-outputs = <2>; + }; }; }; diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7c7eeeaab9fa..0c9b5fc4ba0a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); -extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); +extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern void __kvm_enable_ssbs(void); + extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57c0afcf9dcf..abbdf9703e20 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,9 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; @@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; -/* - * 0 is reserved as an invalid value. - * Order should be kept in sync with the save/restore code. - */ enum vcpu_sysreg { - __INVALID_SYSREG__, + __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CSSELR_EL1, /* Cache Size Selection Register */ SCTLR_EL1, /* System Control Register */ @@ -415,6 +414,8 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 halt_poll_invalid; u64 halt_wakeup; u64 hvc_exit_stat; @@ -530,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); } -void __kvm_enable_ssbs(void); - -static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, - unsigned long hyp_stack_ptr, - unsigned long vector_ptr) -{ - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - - (u64)kvm_ksym_ref(kvm_host_data)); - - /* - * Call initialization code, and switch to the full blown HYP code. - * If the cpucaps haven't been finalized yet, something has gone very - * wrong, and hyp will crash and burn when it uses any - * cpus_have_const_cap() wrapper. - */ - BUG_ON(!system_capabilities_finalized()); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); - - /* - * Disabling SSBD on a non-VHE system requires us to enable SSBS - * at EL2. - */ - if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && - arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); - } -} - static inline bool kvm_arch_requires_vhe(void) { /* @@ -594,8 +562,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) {} - /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 015883671ec3..ce3080834bfa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -55,12 +55,12 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); -void __vgic_v3_save_state(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); -void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 85da6befe76e..324c8483d2b9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) } } -#define kvm_virt_to_phys(x) __pa_symbol(x) - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); @@ -473,7 +471,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is only called on a VHE system */ +/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 2172ec7594ba..953b6a1ce549 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,7 +35,7 @@ #define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ - +#define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 61fd26752adc..5051b388c654 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) + if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) return true; return false; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c19aa81ddc8c..7364de008bab 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -203,7 +203,7 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops) } static int emulation_proc_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 3539d7092612..0577e2142284 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -97,7 +97,7 @@ int main(void) DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); -#ifdef CONFIG_KVM_ARM_HOST +#ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b0ce6bf14f6a..ad06d6802d2e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void) smccc_end = NULL; break; -#if IS_ENABLED(CONFIG_KVM_ARM_HOST) +#if IS_ENABLED(CONFIG_KVM) case SMCCC_CONDUIT_SMC: cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 94289d126993..35cb5e66c504 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -341,8 +341,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) #ifdef CONFIG_SYSCTL static int sve_proc_do_default_vl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; int vl = sve_default_vl; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 04b1ca0d7aba..4b6f4999d06a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -422,7 +422,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (IS_ENABLED(CONFIG_KVM)) kvm_compute_layout(); } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 449386d76441..f1c1f981482c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,7 +3,6 @@ # KVM configuration # -source "virt/kvm/Kconfig" source "virt/lib/Kconfig" menuconfig VIRTUALIZATION @@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM +menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF # for TASKSTATS/TASK_DELAY_ACCT: @@ -28,13 +27,11 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO - select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD - select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING @@ -45,23 +42,24 @@ config KVM select TASK_DELAY_ACCT ---help--- Support hosting virtualized guest machines. - We don't support KVM with 16K page tables yet, due to the multiple - levels of fake page tables. If unsure, say N. -config KVM_ARM_HOST - bool - ---help--- - Provides host support for ARM processors. +if KVM + +source "virt/kvm/Kconfig" config KVM_ARM_PMU - bool + bool "Virtual Performance Monitoring Unit (PMU) support" + depends on HW_PERF_EVENTS + default y ---help--- Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. config KVM_INDIRECT_VECTORS - def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) + def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + +endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5ffbdc39e780..8d3d9513cbfe 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -3,37 +3,25 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic +ccflags-y += -I $(srctree)/$(src) KVM=../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp/ +obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM) += hyp/ -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o +kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ + $(KVM)/vfio.o $(KVM)/irqchip.o \ + arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ + inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ + guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + vgic-sys-reg-v3.o fpsimd.o pmu.o \ + aarch32.o arch_timer.o \ + vgic/vgic.o vgic/vgic-init.o \ + vgic/vgic-irqfd.o vgic/vgic-v2.o \ + vgic/vgic-v3.o vgic/vgic-v4.o \ + vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ + vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ + vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o -kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o - -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o -kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o +kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c new file mode 100644 index 000000000000..0a356aa91aa1 --- /dev/null +++ b/arch/arm64/kvm/aarch32.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/bits.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + + /* Note: These now point to the banked copies */ + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } +} + +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, false, addr); +} + +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, true, addr); +} diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c new file mode 100644 index 000000000000..a1fe0ea3254e --- /dev/null +++ b/arch/arm64/kvm/arch_timer.c @@ -0,0 +1,1171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/uaccess.h> + +#include <clocksource/arm_arch_timer.h> +#include <asm/arch_timer.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +#include <kvm/arm_vgic.h> +#include <kvm/arm_arch_timer.h> + +#include "trace.h" + +static struct timecounter *timecounter; +static unsigned int host_vtimer_irq; +static unsigned int host_ptimer_irq; +static u32 host_vtimer_irq_flags; +static u32 host_ptimer_irq_flags; + +static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); + +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx); +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val); +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg); + +u64 kvm_phys_timer_read(void) +{ + return timecounter->cc->read(timecounter->cc); +} + +static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + if (has_vhe()) { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_ptimer = NULL; + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = NULL; + map->emul_ptimer = vcpu_ptimer(vcpu); + } + + trace_kvm_get_timer_map(vcpu->vcpu_id, map); +} + +static inline bool userspace_irqchip(struct kvm *kvm) +{ + return static_branch_unlikely(&userspace_irqchip_in_use) && + unlikely(!irqchip_in_kernel(kvm)); +} + +static void soft_timer_start(struct hrtimer *hrt, u64 ns) +{ + hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), + HRTIMER_MODE_ABS_HARD); +} + +static void soft_timer_cancel(struct hrtimer *hrt) +{ + hrtimer_cancel(hrt); +} + +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct arch_timer_context *ctx; + struct timer_map map; + + /* + * We may see a timer interrupt after vcpu_put() has been called which + * sets the CPU's vcpu pointer to NULL, because even though the timer + * has been disabled in timer_save_state(), the hardware interrupt + * signal may not have been retired from the interrupt controller yet. + */ + if (!vcpu) + return IRQ_HANDLED; + + get_timer_map(vcpu, &map); + + if (irq == host_vtimer_irq) + ctx = map.direct_vtimer; + else + ctx = map.direct_ptimer; + + if (kvm_timer_should_fire(ctx)) + kvm_timer_update_irq(vcpu, true, ctx); + + if (userspace_irqchip(vcpu->kvm) && + !static_branch_unlikely(&has_gic_active_state)) + disable_percpu_irq(host_vtimer_irq); + + return IRQ_HANDLED; +} + +static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) +{ + u64 cval, now; + + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +{ + WARN_ON(timer_ctx && timer_ctx->loaded); + return timer_ctx && + !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); +} + +/* + * Returns the earliest expiration time in ns among guest timers. + * Note that it will return 0 if none of timers can fire. + */ +static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) +{ + u64 min_delta = ULLONG_MAX; + int i; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; + + WARN(ctx->loaded, "timer %d loaded\n", i); + if (kvm_timer_irq_can_fire(ctx)) + min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); + } + + /* If none of timers can fire, then return 0 */ + if (min_delta == ULLONG_MAX) + return 0; + + return min_delta; +} + +static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) +{ + struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + + timer = container_of(hrt, struct arch_timer_cpu, bg_timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_earliest_exp(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_vcpu_wake_up(vcpu); + return HRTIMER_NORESTART; +} + +static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) +{ + struct arch_timer_context *ctx; + struct kvm_vcpu *vcpu; + u64 ns; + + ctx = container_of(hrt, struct arch_timer_context, hrtimer); + vcpu = ctx->vcpu; + + trace_kvm_timer_hrtimer_expire(ctx); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If not ready, schedule for a later time. + */ + ns = kvm_timer_compute_delta(ctx); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_timer_update_irq(vcpu, true, ctx); + return HRTIMER_NORESTART; +} + +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) +{ + enum kvm_arch_timers index; + u64 cval, now; + + if (!timer_ctx) + return false; + + index = arch_timer_ctx_index(timer_ctx); + + if (timer_ctx->loaded) { + u32 cnt_ctl = 0; + + switch (index) { + case TIMER_VTIMER: + cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); + break; + case TIMER_PTIMER: + cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); + break; + case NR_KVM_TIMERS: + /* GCC is braindead */ + cnt_ctl = 0; + break; + } + + return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && + (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && + !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); + } + + if (!kvm_timer_irq_can_fire(timer_ctx)) + return false; + + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; + + return cval <= now; +} + +bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) +{ + struct timer_map map; + + get_timer_map(vcpu, &map); + + return kvm_timer_should_fire(map.direct_vtimer) || + kvm_timer_should_fire(map.direct_ptimer) || + kvm_timer_should_fire(map.emul_ptimer); +} + +/* + * Reflect the timer output level into the kvm_run structure + */ +void kvm_timer_update_run(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the device bitmap with the timer states */ + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | + KVM_ARM_DEV_EL1_PTIMER); + if (kvm_timer_should_fire(vtimer)) + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; + if (kvm_timer_should_fire(ptimer)) + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; +} + +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx) +{ + int ret; + + timer_ctx->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); + + if (!userspace_irqchip(vcpu->kvm)) { + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer_ctx->irq.irq, + timer_ctx->irq.level, + timer_ctx); + WARN_ON(ret); + } +} + +/* Only called for a fully emulated timer */ +static void timer_emulate(struct arch_timer_context *ctx) +{ + bool should_fire = kvm_timer_should_fire(ctx); + + trace_kvm_timer_emulate(ctx, should_fire); + + if (should_fire != ctx->irq.level) { + kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + return; + } + + /* + * If the timer can fire now, we don't need to have a soft timer + * scheduled for the future. If the timer cannot fire at all, + * then we also don't need a soft timer. + */ + if (!kvm_timer_irq_can_fire(ctx)) { + soft_timer_cancel(&ctx->hrtimer); + return; + } + + soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); +} + +static void timer_save_state(struct arch_timer_context *ctx) +{ + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); + unsigned long flags; + + if (!timer->enabled) + return; + + local_irq_save(flags); + + if (!ctx->loaded) + goto out; + + switch (index) { + case TIMER_VTIMER: + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); + + /* Disable the timer */ + write_sysreg_el0(0, SYS_CNTV_CTL); + isb(); + + break; + case TIMER_PTIMER: + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); + + /* Disable the timer */ + write_sysreg_el0(0, SYS_CNTP_CTL); + isb(); + + break; + case NR_KVM_TIMERS: + BUG(); + } + + trace_kvm_timer_save_state(ctx); + + ctx->loaded = false; +out: + local_irq_restore(flags); +} + +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +static void kvm_timer_blocking(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * If no timers are capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(map.direct_vtimer) && + !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_ptimer)) + return; + + /* + * At least one guest time will expire. Schedule a background timer. + * Set the earliest expiration time among the guest timers. + */ + soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); +} + +static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + soft_timer_cancel(&timer->bg_timer); +} + +static void timer_restore_state(struct arch_timer_context *ctx) +{ + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); + unsigned long flags; + + if (!timer->enabled) + return; + + local_irq_save(flags); + + if (ctx->loaded) + goto out; + + switch (index) { + case TIMER_VTIMER: + write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); + isb(); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); + break; + case TIMER_PTIMER: + write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); + isb(); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); + break; + case NR_KVM_TIMERS: + BUG(); + } + + trace_kvm_timer_restore_state(ctx); + + ctx->loaded = true; +out: + local_irq_restore(flags); +} + +static void set_cntvoff(u64 cntvoff) +{ + kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); +} + +static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) +{ + int r; + r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); + WARN_ON(r); +} + +static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) +{ + struct kvm_vcpu *vcpu = ctx->vcpu; + bool phys_active = false; + + /* + * Update the timer output so that it is likely to match the + * state we're about to restore. If the timer expires between + * this point and the register restoration, we'll take the + * interrupt anyway. + */ + kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + + if (irqchip_in_kernel(vcpu->kvm)) + phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + + phys_active |= ctx->irq.level; + + set_timer_irq_phys_active(ctx, phys_active); +} + +static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * Update the timer output so that it is likely to match the + * state we're about to restore. If the timer expires between + * this point and the register restoration, we'll take the + * interrupt anyway. + */ + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); + + /* + * When using a userspace irqchip with the architected timers and a + * host interrupt controller that doesn't support an active state, we + * must still prevent continuously exiting from the guest, and + * therefore mask the physical interrupt by disabling it on the host + * interrupt controller when the virtual level is high, such that the + * guest can make forward progress. Once we detect the output level + * being de-asserted, we unmask the interrupt again so that we exit + * from the guest when the timer fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); +} + +void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + if (unlikely(!timer->enabled)) + return; + + get_timer_map(vcpu, &map); + + if (static_branch_likely(&has_gic_active_state)) { + kvm_timer_vcpu_load_gic(map.direct_vtimer); + if (map.direct_ptimer) + kvm_timer_vcpu_load_gic(map.direct_ptimer); + } else { + kvm_timer_vcpu_load_nogic(vcpu); + } + + set_cntvoff(map.direct_vtimer->cntvoff); + + kvm_timer_unblocking(vcpu); + + timer_restore_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_restore_state(map.direct_ptimer); + + if (map.emul_ptimer) + timer_emulate(map.emul_ptimer); +} + +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool vlevel, plevel; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; + + return kvm_timer_should_fire(vtimer) != vlevel || + kvm_timer_should_fire(ptimer) != plevel; +} + +void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + if (unlikely(!timer->enabled)) + return; + + get_timer_map(vcpu, &map); + + timer_save_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_save_state(map.direct_ptimer); + + /* + * Cancel soft timer emulation, because the only case where we + * need it after a vcpu_put is in the context of a sleeping VCPU, and + * in that case we already factor in the deadline for the physical + * timer when scheduling the bg_timer. + * + * In any case, we re-schedule the hrtimer for the physical timer when + * coming back to the VCPU thread in kvm_timer_vcpu_load(). + */ + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); + + if (rcuwait_active(wait)) + kvm_timer_blocking(vcpu); + + /* + * The kernel may decide to run userspace after calling vcpu_put, so + * we reset cntvoff to 0 to ensure a consistent read between user + * accesses to the virtual counter and kernel access to the physical + * counter of non-VHE case. For VHE, the virtual counter uses a fixed + * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. + */ + set_cntvoff(0); +} + +/* + * With a userspace irqchip we have to check if the guest de-asserted the + * timer and if so, unmask the timer irq signal on the host interrupt + * controller to ensure that we see future timer signals. + */ +static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + if (static_branch_likely(&has_gic_active_state)) + set_timer_irq_phys_active(vtimer, false); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); + } +} + +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + if (unlikely(!timer->enabled)) + return; + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + unmask_vtimer_irq_user(vcpu); +} + +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + vcpu_vtimer(vcpu)->cnt_ctl = 0; + vcpu_ptimer(vcpu)->cnt_ctl = 0; + + if (timer->enabled) { + kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); + kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + + if (irqchip_in_kernel(vcpu->kvm)) { + kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + if (map.direct_ptimer) + kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + } + } + + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); + + return 0; +} + +/* Make the updates of cntvoff for all vtimer contexts atomic */ +static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) +{ + int i; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, tmp, kvm) + vcpu_vtimer(tmp)->cntvoff = cntvoff; + + /* + * When called from the vcpu create path, the CPU being created is not + * included in the loop above, so we just set it here as well. + */ + vcpu_vtimer(vcpu)->cntvoff = cntvoff; + mutex_unlock(&kvm->lock); +} + +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + /* Synchronize cntvoff across all vtimers of a VM. */ + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); + ptimer->cntvoff = 0; + + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + timer->bg_timer.function = kvm_bg_timer_expire; + + hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + vtimer->hrtimer.function = kvm_hrtimer_expire; + ptimer->hrtimer.function = kvm_hrtimer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; + + vtimer->host_timer_irq = host_vtimer_irq; + ptimer->host_timer_irq = host_ptimer_irq; + + vtimer->host_timer_irq_flags = host_vtimer_irq_flags; + ptimer->host_timer_irq_flags = host_ptimer_irq_flags; + + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; +} + +static void kvm_timer_init_interrupt(void *info) +{ + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); + enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + struct arch_timer_context *timer; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); + break; + case KVM_REG_ARM_TIMER_CNT: + timer = vcpu_vtimer(vcpu); + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); + break; + case KVM_REG_ARM_TIMER_CVAL: + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); + break; + case KVM_REG_ARM_PTIMER_CTL: + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); + break; + case KVM_REG_ARM_PTIMER_CVAL: + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); + break; + + default: + return -1; + } + + return 0; +} + +static u64 read_timer_ctl(struct arch_timer_context *timer) +{ + /* + * Set ISTATUS bit if it's expired. + * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is + * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit + * regardless of ENABLE bit for our implementation convenience. + */ + if (!kvm_timer_compute_delta(timer)) + return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; + else + return timer->cnt_ctl; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CTL); + case KVM_REG_ARM_TIMER_CNT: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CNT); + case KVM_REG_ARM_TIMER_CVAL: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CVAL); + case KVM_REG_ARM_PTIMER_CTL: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CTL); + case KVM_REG_ARM_PTIMER_CNT: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CNT); + case KVM_REG_ARM_PTIMER_CVAL: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CVAL); + } + return (u64)-1; +} + +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + switch (treg) { + case TIMER_REG_TVAL: + val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); + break; + + case TIMER_REG_CTL: + val = read_timer_ctl(timer); + break; + + case TIMER_REG_CVAL: + val = timer->cnt_cval; + break; + + case TIMER_REG_CNT: + val = kvm_phys_timer_read() - timer->cntvoff; + break; + + default: + BUG(); + } + + return val; +} + +u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); + + return val; +} + +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val) +{ + switch (treg) { + case TIMER_REG_TVAL: + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; + break; + + case TIMER_REG_CTL: + timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + break; + + case TIMER_REG_CVAL: + timer->cnt_cval = val; + break; + + default: + BUG(); + } +} + +void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg, + u64 val) +{ + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); +} + +static int kvm_timer_starting_cpu(unsigned int cpu) +{ + kvm_timer_init_interrupt(NULL); + return 0; +} + +static int kvm_timer_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(host_vtimer_irq); + return 0; +} + +int kvm_timer_hyp_init(bool has_gic) +{ + struct arch_timer_kvm_info *info; + int err; + + info = arch_timer_get_kvm_info(); + timecounter = &info->timecounter; + + if (!timecounter->cc) { + kvm_err("kvm_arch_timer: uninitialized timecounter\n"); + return -ENODEV; + } + + /* First, do the virtual EL1 timer irq */ + + if (info->virtual_irq <= 0) { + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", + info->virtual_irq); + return -ENODEV; + } + host_vtimer_irq = info->virtual_irq; + + host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); + if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && + host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", + host_vtimer_irq); + host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + } + + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, + "kvm guest vtimer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", + host_vtimer_irq, err); + return err; + } + + if (has_gic) { + err = irq_set_vcpu_affinity(host_vtimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + + static_branch_enable(&has_gic_active_state); + } + + kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); + + /* Now let's do the physical EL1 timer irq */ + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); + if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && + host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", + host_ptimer_irq); + host_ptimer_irq_flags = IRQF_TRIGGER_LOW; + } + + err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, + "kvm guest ptimer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", + host_ptimer_irq, err); + return err; + } + + if (has_gic) { + err = irq_set_vcpu_affinity(host_ptimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + } + + kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); + } else if (has_vhe()) { + kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", + info->physical_irq); + err = -ENODEV; + goto out_free_irq; + } + + cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, + "kvm/arm/timer:starting", kvm_timer_starting_cpu, + kvm_timer_dying_cpu); + return 0; +out_free_irq: + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); + return err; +} + +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + soft_timer_cancel(&timer->bg_timer); +} + +static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) +{ + int vtimer_irq, ptimer_irq; + int i, ret; + + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); + if (ret) + return false; + + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); + if (ret) + return false; + + kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + +bool kvm_arch_timer_get_input_level(int vintid) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + struct arch_timer_context *timer; + + if (vintid == vcpu_vtimer(vcpu)->irq.irq) + timer = vcpu_vtimer(vcpu); + else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + timer = vcpu_ptimer(vcpu); + else + BUG(); + + return kvm_timer_should_fire(timer); +} + +int kvm_timer_enable(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + int ret; + + if (timer->enabled) + return 0; + + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ + if (!irqchip_in_kernel(vcpu->kvm)) + goto no_vgic; + + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!timer_irqs_are_valid(vcpu)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + + get_timer_map(vcpu, &map); + + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_vtimer->host_timer_irq, + map.direct_vtimer->irq.irq, + kvm_arch_timer_get_input_level); + if (ret) + return ret; + + if (map.direct_ptimer) { + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_ptimer->host_timer_irq, + map.direct_ptimer->irq.irq, + kvm_arch_timer_get_input_level); + } + + if (ret) + return ret; + +no_vgic: + timer->enabled = 1; + return 0; +} + +/* + * On VHE system, we only need to configure the EL2 timer trap register once, + * not for every world switch. + * The host kernel runs at EL2 with HCR_EL2.TGE == 1, + * and this makes those bits have no effect for the host kernel execution. + */ +void kvm_timer_init_vhe(void) +{ + /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ + u32 cnthctl_shift = 10; + u64 val; + + /* + * VHE systems allow the guest direct access to the EL1 physical + * timer/counter. + */ + val = read_sysreg(cnthctl_el2); + val |= (CNTHCTL_EL1PCEN << cnthctl_shift); + val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + write_sysreg(val, cnthctl_el2); +} + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c new file mode 100644 index 000000000000..7a57381c05e8 --- /dev/null +++ b/arch/arm64/kvm/arm.c @@ -0,0 +1,1710 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/bug.h> +#include <linux/cpu_pm.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <linux/kvm.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> +#include <linux/sched/stat.h> +#include <trace/events/kvm.h> + +#define CREATE_TRACE_POINTS +#include "trace_arm.h" + +#include <linux/uaccess.h> +#include <asm/ptrace.h> +#include <asm/mman.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/virt.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/sections.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_pmu.h> +#include <kvm/arm_psci.h> + +#ifdef REQUIRES_VIRT +__asm__(".arch_extension virt"); +#endif + +DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); +static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); + +/* The VMID used in the VTTBR */ +static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); +static u32 kvm_next_vmid; +static DEFINE_SPINLOCK(kvm_vmid_lock); + +static bool vgic_present; + +static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); +DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +int kvm_arch_hardware_setup(void *opaque) +{ + return 0; +} + +int kvm_arch_check_processor_compat(void *opaque) +{ + return 0; +} + +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_ARM_NISV_TO_USER: + r = 0; + kvm->arch.return_nisv_io_abort_to_user = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +static int kvm_arm_default_max_vcpus(void) +{ + return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; +} + +/** + * kvm_arch_init_vm - initializes a VM data structure + * @kvm: pointer to the KVM struct + */ +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + int ret, cpu; + + ret = kvm_arm_setup_stage2(kvm, type); + if (ret) + return ret; + + kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); + if (!kvm->arch.last_vcpu_ran) + return -ENOMEM; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; + + ret = kvm_alloc_stage2_pgd(kvm); + if (ret) + goto out_fail_alloc; + + ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); + if (ret) + goto out_free_stage2_pgd; + + kvm_vgic_early_init(kvm); + + /* Mark the initial VMID generation invalid */ + kvm->arch.vmid.vmid_gen = 0; + + /* The maximum number of VCPUs is limited by the host's GIC model */ + kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + + return ret; +out_free_stage2_pgd: + kvm_free_stage2_pgd(kvm); +out_fail_alloc: + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; + return ret; +} + +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + return 0; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + + +/** + * kvm_arch_destroy_vm - destroy the VM data structure + * @kvm: pointer to the KVM struct + */ +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + kvm_vgic_destroy(kvm); + + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + atomic_set(&kvm->online_vcpus, 0); +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + switch (ext) { + case KVM_CAP_IRQCHIP: + r = vgic_present; + break; + case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: + case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: + case KVM_CAP_ARM_NISV_TO_USER: + case KVM_CAP_ARM_INJECT_EXT_DABT: + r = 1; + break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + if (kvm) + r = kvm->arch.max_vcpus; + else + r = kvm_arm_default_max_vcpus(); + break; + case KVM_CAP_MSI_DEVID: + if (!kvm) + r = -EINVAL; + else + r = kvm->arch.vgic.msis_require_devid; + break; + case KVM_CAP_ARM_USER_IRQ: + /* + * 1: EL1_VTIMER, EL1_PTIMER, and PMU. + * (bump this number if adding more devices) + */ + r = 1; + break; + default: + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + break; + } + return r; +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +struct kvm *kvm_arch_alloc_vm(void) +{ + if (!has_vhe()) + return kzalloc(sizeof(struct kvm), GFP_KERNEL); + + return vzalloc(sizeof(struct kvm)); +} + +void kvm_arch_free_vm(struct kvm *kvm) +{ + if (!has_vhe()) + kfree(kvm); + else + vfree(kvm); +} + +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + return -EBUSY; + + if (id >= kvm->arch.max_vcpus) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int err; + + /* Force users to call KVM_ARM_VCPU_INIT */ + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); + + kvm_pmu_vcpu_init(vcpu); + + kvm_arm_reset_debug_ptr(vcpu); + + kvm_arm_pvtime_vcpu_init(&vcpu->arch); + + err = kvm_vgic_vcpu_init(vcpu); + if (err) + return err; + + return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + static_branch_dec(&userspace_irqchip_in_use); + + kvm_mmu_free_memory_caches(vcpu); + kvm_timer_vcpu_terminate(vcpu); + kvm_pmu_vcpu_destroy(vcpu); + + kvm_arm_vcpu_destroy(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvm_timer_is_pending(vcpu); +} + +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + /* + * If we're about to block (most likely because we've just hit a + * WFI), we need to sync back the state of the GIC CPU interface + * so that we have the latest PMR and group enables. This ensures + * that kvm_arch_vcpu_runnable has up-to-date data to decide + * whether we have pending interrupts. + * + * For the same reason, we want to tell GICv4 that we need + * doorbells to be signalled, should an interrupt become pending. + */ + preempt_disable(); + kvm_vgic_vmcr_sync(vcpu); + vgic_v4_put(vcpu, true); + preempt_enable(); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + vgic_v4_load(vcpu); + preempt_enable(); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + int *last_ran; + kvm_host_data_t *cpu_data; + + last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + cpu_data = this_cpu_ptr(&kvm_host_data); + + /* + * We might get preempted before the vCPU actually runs, but + * over-invalidation doesn't affect correctness. + */ + if (*last_ran != vcpu->vcpu_id) { + kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + *last_ran = vcpu->vcpu_id; + } + + vcpu->cpu = cpu; + vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; + + kvm_vgic_load(vcpu); + kvm_timer_vcpu_load(vcpu); + kvm_vcpu_load_sysregs(vcpu); + kvm_arch_vcpu_load_fp(vcpu); + kvm_vcpu_pmu_restore_guest(vcpu); + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + + if (single_task_running()) + vcpu_clear_wfx_traps(vcpu); + else + vcpu_set_wfx_traps(vcpu); + + vcpu_ptrauth_setup_lazy(vcpu); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_put_fp(vcpu); + kvm_vcpu_put_sysregs(vcpu); + kvm_timer_vcpu_put(vcpu); + kvm_vgic_put(vcpu); + kvm_vcpu_pmu_restore_host(vcpu); + + vcpu->cpu = -1; +} + +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + if (vcpu->arch.power_off) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + int ret = 0; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.power_off = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu_power_off(vcpu); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/** + * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled + * @v: The VCPU pointer + * + * If the guest CPU is not waiting for interrupts or an interrupt line is + * asserted, the CPU is by definition runnable. + */ +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + && !v->arch.power_off && !v->arch.pause); +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu_mode_priv(vcpu); +} + +/* Just ensure a guest exit from a particular CPU */ +static void exit_vm_noop(void *info) +{ +} + +void force_vm_exit(const cpumask_t *mask) +{ + preempt_disable(); + smp_call_function_many(mask, exit_vm_noop, NULL, true); + preempt_enable(); +} + +/** + * need_new_vmid_gen - check that the VMID is still valid + * @vmid: The VMID to check + * + * return true if there is a new generation of VMIDs being used + * + * The hardware supports a limited set of values with the value zero reserved + * for the host, so we check if an assigned value belongs to a previous + * generation, which requires us to assign a new value. If we're the first to + * use a VMID for the new generation, we must flush necessary caches and TLBs + * on all CPUs. + */ +static bool need_new_vmid_gen(struct kvm_vmid *vmid) +{ + u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); + smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ + return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); +} + +/** + * update_vmid - Update the vmid with a valid VMID for the current generation + * @kvm: The guest that struct vmid belongs to + * @vmid: The stage-2 VMID information struct + */ +static void update_vmid(struct kvm_vmid *vmid) +{ + if (!need_new_vmid_gen(vmid)) + return; + + spin_lock(&kvm_vmid_lock); + + /* + * We need to re-check the vmid_gen here to ensure that if another vcpu + * already allocated a valid vmid for this vm, then this vcpu should + * use the same vmid. + */ + if (!need_new_vmid_gen(vmid)) { + spin_unlock(&kvm_vmid_lock); + return; + } + + /* First user of a new VMID generation? */ + if (unlikely(kvm_next_vmid == 0)) { + atomic64_inc(&kvm_vmid_gen); + kvm_next_vmid = 1; + + /* + * On SMP we know no other CPUs can use this CPU's or each + * other's VMID after force_vm_exit returns since the + * kvm_vmid_lock blocks them from reentry to the guest. + */ + force_vm_exit(cpu_all_mask); + /* + * Now broadcast TLB + ICACHE invalidation over the inner + * shareable domain to make sure all data structures are + * clean. + */ + kvm_call_hyp(__kvm_flush_vm_context); + } + + vmid->vmid = kvm_next_vmid; + kvm_next_vmid++; + kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; + + smp_wmb(); + WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); + + spin_unlock(&kvm_vmid_lock); +} + +static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + int ret = 0; + + if (likely(vcpu->arch.has_run_once)) + return 0; + + if (!kvm_arm_vcpu_is_finalized(vcpu)) + return -EPERM; + + vcpu->arch.has_run_once = true; + + if (likely(irqchip_in_kernel(kvm))) { + /* + * Map the VGIC hardware resources before running a vcpu the + * first time on this VM. + */ + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; + } + } else { + /* + * Tell the rest of the code that there are userspace irqchip + * VMs in the wild. + */ + static_branch_inc(&userspace_irqchip_in_use); + } + + ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); + + return ret; +} + +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + +void kvm_arm_halt_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + vcpu->arch.pause = true; + kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); +} + +void kvm_arm_resume_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.pause = false; + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); + } +} + +static void vcpu_req_sleep(struct kvm_vcpu *vcpu) +{ + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + rcuwait_wait_event(wait, + (!vcpu->arch.power_off) &&(!vcpu->arch.pause), + TASK_INTERRUPTIBLE); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* Awaken to handle a signal, request we sleep again later. */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } + + /* + * Make sure we will observe a potential reset request if we've + * observed a change to the power state. Pairs with the smp_wmb() in + * kvm_psci_vcpu_on(). + */ + smp_rmb(); +} + +static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.target >= 0; +} + +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) + vcpu_req_sleep(vcpu); + + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + + /* + * Clear IRQ_PENDING requests that were made to guarantee + * that a VCPU sees new virtual interrupts. + */ + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); + + if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { + /* The distributor enable bits were changed */ + preempt_disable(); + vgic_v4_put(vcpu, false); + vgic_v4_load(vcpu); + preempt_enable(); + } + } +} + +/** + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code + * @vcpu: The VCPU pointer + * + * This function is called through the VCPU_RUN ioctl called from user space. It + * will execute VM code in a loop until the time slice for the process is used + * or some emulation is needed from user space in which case the function will + * return with return value 0 and with the kvm_run structure filled in with the + * required data for the requested emulation. + */ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int ret; + + if (unlikely(!kvm_vcpu_initialized(vcpu))) + return -ENOEXEC; + + ret = kvm_vcpu_first_run_init(vcpu); + if (ret) + return ret; + + if (run->exit_reason == KVM_EXIT_MMIO) { + ret = kvm_handle_mmio_return(vcpu, run); + if (ret) + return ret; + } + + if (run->immediate_exit) + return -EINTR; + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + ret = 1; + run->exit_reason = KVM_EXIT_UNKNOWN; + while (ret > 0) { + /* + * Check conditions before entering the guest + */ + cond_resched(); + + update_vmid(&vcpu->kvm->arch.vmid); + + check_vcpu_requests(vcpu); + + /* + * Preparing the interrupts to be injected also + * involves poking the GIC, which must be done in a + * non-preemptible context. + */ + preempt_disable(); + + kvm_pmu_flush_hwstate(vcpu); + + local_irq_disable(); + + kvm_vgic_flush_hwstate(vcpu); + + /* + * Exit if we have a signal pending so that we can deliver the + * signal to user space. + */ + if (signal_pending(current)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + + /* + * If we're using a userspace irqchip, then check if we need + * to tell a userspace irqchip about timer or PMU level + * changes and if so, exit to userspace (the actual level + * state gets updated in kvm_timer_update_run and + * kvm_pmu_update_run below). + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) { + if (kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + } + + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virt/kvm/vcpu-requests.rst + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || + kvm_request_pending(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; + isb(); /* Ensure work in x_flush_hwstate is committed */ + kvm_pmu_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); + local_irq_enable(); + preempt_enable(); + continue; + } + + kvm_arm_setup_debug(vcpu); + + /************************************************************** + * Enter the guest + */ + trace_kvm_entry(*vcpu_pc(vcpu)); + guest_enter_irqoff(); + + if (has_vhe()) { + ret = kvm_vcpu_run_vhe(vcpu); + } else { + ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); + } + + vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; + /* + * Back from guest + *************************************************************/ + + kvm_arm_clear_debug(vcpu); + + /* + * We must sync the PMU state before the vgic state so + * that the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_pmu_sync_hwstate(vcpu); + + /* + * Sync the vgic state before syncing the timer state because + * the timer code needs to know if the virtual timer + * interrupts are active. + */ + kvm_vgic_sync_hwstate(vcpu); + + /* + * Sync the timer hardware state before enabling interrupts as + * we don't want vtimer interrupts to race with syncing the + * timer virtual interrupt state. + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); + + kvm_arch_vcpu_ctxsync_fp(vcpu); + + /* + * We may have taken a host interrupt in HYP mode (ie + * while executing the guest). This interrupt is still + * pending, as we haven't serviced it yet! + * + * We're now back in SVC mode, with interrupts + * disabled. Enabling the interrupts now will have + * the effect of taking the interrupt again, in SVC + * mode this time. + */ + local_irq_enable(); + + /* + * We do local_irq_enable() before calling guest_exit() so + * that if a timer interrupt hits while running the guest we + * account that tick as being spent in the guest. We enable + * preemption after calling guest_exit() so that if we get + * preempted we make sure ticks after that is not counted as + * guest time. + */ + guest_exit(); + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, run, ret); + + preempt_enable(); + + ret = handle_exit(vcpu, run, ret); + } + + /* Tell userspace about in-kernel device output levels */ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_timer_update_run(vcpu); + kvm_pmu_update_run(vcpu); + } + + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + return ret; +} + +static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) +{ + int bit_index; + bool set; + unsigned long *hcr; + + if (number == KVM_ARM_IRQ_CPU_IRQ) + bit_index = __ffs(HCR_VI); + else /* KVM_ARM_IRQ_CPU_FIQ */ + bit_index = __ffs(HCR_VF); + + hcr = vcpu_hcr(vcpu); + if (level) + set = test_and_set_bit(bit_index, hcr); + else + set = test_and_clear_bit(bit_index, hcr); + + /* + * If we didn't change anything, no need to wake up or kick other CPUs + */ + if (set == level) + return 0; + + /* + * The vcpu irq_lines field was updated, wake up sleeping VCPUs and + * trigger a world-switch round on the running physical CPU to set the + * virtual IRQ/FIQ fields in the HCR appropriately. + */ + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num; + int nrcpus = atomic_read(&kvm->online_vcpus); + struct kvm_vcpu *vcpu = NULL; + bool level = irq_level->level; + + irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); + irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; + + trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + + switch (irq_type) { + case KVM_ARM_IRQ_TYPE_CPU: + if (irqchip_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); + case KVM_ARM_IRQ_TYPE_PPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); + case KVM_ARM_IRQ_TYPE_SPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (irq_num < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); + } + + return -EINVAL; +} + +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i, ret; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + ret = kvm_reset_vcpu(vcpu); + if (ret) { + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + } + + return ret; +} + +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + * + * S2FWB enforces all memory accesses to RAM being cacheable, we + * ensure that the cache is always coherent. + */ + if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + stage2_unmap_vm(vcpu->kvm); + + vcpu_reset_hcr(vcpu); + + /* + * Handle the "start in power-off" case. + */ + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu_power_off(vcpu); + else + vcpu->arch.power_off = false; + + return 0; +} + +static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + memset(events, 0, sizeof(*events)); + + return __kvm_arm_vcpu_get_events(vcpu, events); +} + +static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + int i; + + /* check whether the reserved field is zero */ + for (i = 0; i < ARRAY_SIZE(events->reserved); i++) + if (events->reserved[i]) + return -EINVAL; + + /* check whether the pad field is zero */ + for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) + if (events->exception.pad[i]) + return -EINVAL; + + return __kvm_arm_vcpu_set_events(vcpu, events); +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; + long r; + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + r = -EFAULT; + if (copy_from_user(&init, argp, sizeof(init))) + break; + + r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + break; + } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -ENOEXEC; + if (unlikely(!kvm_vcpu_initialized(vcpu))) + break; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + + if (ioctl == KVM_SET_ONE_REG) + r = kvm_arm_set_reg(vcpu, ®); + else + r = kvm_arm_get_reg(vcpu, ®); + break; + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + r = -ENOEXEC; + if (unlikely(!kvm_vcpu_initialized(vcpu))) + break; + + r = -EPERM; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + break; + + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; + n = reg_list.n; + reg_list.n = kvm_arm_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + break; + r = -E2BIG; + if (n < reg_list.n) + break; + r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_set_attr(vcpu, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_get_attr(vcpu, &attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_has_attr(vcpu, &attr); + break; + } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (kvm_arm_vcpu_get_events(vcpu, &events)) + return -EINVAL; + + if (copy_to_user(argp, &events, sizeof(events))) + return -EFAULT; + + return 0; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (copy_from_user(&events, argp, sizeof(events))) + return -EFAULT; + + return kvm_arm_vcpu_set_events(vcpu, &events); + } + case KVM_ARM_VCPU_FINALIZE: { + int what; + + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + + if (get_user(what, (const int __user *)argp)) + return -EFAULT; + + return kvm_arm_vcpu_finalize(vcpu, what); + } + default: + r = -EINVAL; + } + + return r; +} + +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + kvm_flush_remote_tlbs(kvm); +} + +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + unsigned long dev_id, type; + + dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> + KVM_ARM_DEVICE_ID_SHIFT; + type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> + KVM_ARM_DEVICE_TYPE_SHIFT; + + switch (dev_id) { + case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); + default: + return -ENODEV; + } +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + int ret; + if (!vgic_present) + return -ENXIO; + mutex_lock(&kvm->lock); + ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + mutex_unlock(&kvm->lock); + return ret; + } + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } + default: + return -EINVAL; + } +} + +static void cpu_init_hyp_mode(void) +{ + phys_addr_t pgd_ptr; + unsigned long hyp_stack_ptr; + unsigned long vector_ptr; + unsigned long tpidr_el2; + + /* Switch from the HYP stub to our own HYP init vector */ + __hyp_set_vectors(kvm_get_idmap_vector()); + + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - + (unsigned long)kvm_ksym_ref(kvm_host_data)); + + pgd_ptr = kvm_mmu_get_httbr(); + hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; + vector_ptr = (unsigned long)kvm_get_hyp_vector(); + + /* + * Call initialization code, and switch to the full blown HYP code. + * If the cpucaps haven't been finalized yet, something has gone very + * wrong, and hyp will crash and burn when it uses any + * cpus_have_const_cap() wrapper. + */ + BUG_ON(!system_capabilities_finalized()); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } +} + +static void cpu_hyp_reset(void) +{ + if (!is_kernel_in_hyp_mode()) + __hyp_reset_vectors(); +} + +static void cpu_hyp_reinit(void) +{ + kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt); + + cpu_hyp_reset(); + + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + else + cpu_init_hyp_mode(); + + kvm_arm_init_debug(); + + if (vgic_present) + kvm_vgic_init_cpu_hardware(); +} + +static void _kvm_arch_hardware_enable(void *discard) +{ + if (!__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reinit(); + __this_cpu_write(kvm_arm_hardware_enabled, 1); + } +} + +int kvm_arch_hardware_enable(void) +{ + _kvm_arch_hardware_enable(NULL); + return 0; +} + +static void _kvm_arch_hardware_disable(void *discard) +{ + if (__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reset(); + __this_cpu_write(kvm_arm_hardware_enabled, 0); + } +} + +void kvm_arch_hardware_disable(void) +{ + _kvm_arch_hardware_disable(NULL); +} + +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + /* + * kvm_arm_hardware_enabled is left with its old value over + * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should + * re-enable hyp. + */ + switch (cmd) { + case CPU_PM_ENTER: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* + * don't update kvm_arm_hardware_enabled here + * so that the hardware will be re-enabled + * when we resume. See below. + */ + cpu_hyp_reset(); + + return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* The hardware was enabled before suspend. */ + cpu_hyp_reinit(); + + return NOTIFY_OK; + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +static inline void hyp_cpu_pm_exit(void) +{ +} +#endif + +static int init_common_resources(void) +{ + return kvm_set_ipa_limit(); +} + +static int init_subsystems(void) +{ + int err = 0; + + /* + * Enable hardware so that subsystem initialisation can access EL2. + */ + on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); + + /* + * Register CPU lower-power notifier + */ + hyp_cpu_pm_init(); + + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + err = 0; + break; + default: + goto out; + } + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(vgic_present); + if (err) + goto out; + + kvm_perf_init(); + kvm_coproc_table_init(); + +out: + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + + return err; +} + +static void teardown_hyp_mode(void) +{ + int cpu; + + free_hyp_pgds(); + for_each_possible_cpu(cpu) + free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); +} + +/** + * Inits Hyp-mode on all online CPUs + */ +static int init_hyp_mode(void) +{ + int cpu; + int err = 0; + + /* + * Allocate Hyp PGD and setup Hyp identity mapping + */ + err = kvm_mmu_init(); + if (err) + goto out_err; + + /* + * Allocate stack pages for Hypervisor-mode + */ + for_each_possible_cpu(cpu) { + unsigned long stack_page; + + stack_page = __get_free_page(GFP_KERNEL); + if (!stack_page) { + err = -ENOMEM; + goto out_err; + } + + per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; + } + + /* + * Map the Hyp-code called directly from the host + */ + err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), + kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); + if (err) { + kvm_err("Cannot map world-switch code\n"); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), + kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map bss section\n"); + goto out_err; + } + + err = kvm_map_vectors(); + if (err) { + kvm_err("Cannot map vectors\n"); + goto out_err; + } + + /* + * Map the Hyp stack pages + */ + for_each_possible_cpu(cpu) { + char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); + err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, + PAGE_HYP); + + if (err) { + kvm_err("Cannot map hyp stack\n"); + goto out_err; + } + } + + for_each_possible_cpu(cpu) { + kvm_host_data_t *cpu_data; + + cpu_data = per_cpu_ptr(&kvm_host_data, cpu); + err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); + + if (err) { + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_err; + } + } + + err = hyp_map_aux_data(); + if (err) + kvm_err("Cannot map host auxiliary data: %d\n", err); + + return 0; + +out_err: + teardown_hyp_mode(); + kvm_err("error initializing Hyp mode: %d\n", err); + return err; +} + +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + +struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) +{ + struct kvm_vcpu *vcpu; + int i; + + mpidr &= MPIDR_HWID_BITMASK; + kvm_for_each_vcpu(i, vcpu, kvm) { + if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) + return vcpu; + } + return NULL; +} + +bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} + +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_halt_guest(irqfd->kvm); +} + +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_resume_guest(irqfd->kvm); +} + +/** + * Initialize Hyp-mode and memory mappings on all CPUs. + */ +int kvm_arch_init(void *opaque) +{ + int err; + int ret, cpu; + bool in_hyp_mode; + + if (!is_hyp_mode_available()) { + kvm_info("HYP mode not available\n"); + return -ENODEV; + } + + in_hyp_mode = is_kernel_in_hyp_mode(); + + if (!in_hyp_mode && kvm_arch_requires_vhe()) { + kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); + return -ENODEV; + } + + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } + } + + err = init_common_resources(); + if (err) + return err; + + err = kvm_arm_init_sve(); + if (err) + return err; + + if (!in_hyp_mode) { + err = init_hyp_mode(); + if (err) + goto out_err; + } + + err = init_subsystems(); + if (err) + goto out_hyp; + + if (in_hyp_mode) + kvm_info("VHE mode initialized successfully\n"); + else + kvm_info("Hyp mode initialized successfully\n"); + + return 0; + +out_hyp: + hyp_cpu_pm_exit(); + if (!in_hyp_mode) + teardown_hyp_mode(); +out_err: + return err; +} + +/* NOP: Compiling as a module not supported */ +void kvm_arch_exit(void) +{ + kvm_perf_teardown(); +} + +static int arm_init(void) +{ + int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return rc; +} + +module_init(arm_init); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 50a279d3ddd7..aea43ec60f37 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,20 +29,19 @@ #include "trace.h" -#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } -#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } - struct kvm_stats_debugfs_item debugfs_entries[] = { - VCPU_STAT(halt_successful_poll), - VCPU_STAT(halt_attempted_poll), - VCPU_STAT(halt_poll_invalid), - VCPU_STAT(halt_wakeup), - VCPU_STAT(hvc_exit_stat), - VCPU_STAT(wfe_exit_stat), - VCPU_STAT(wfi_exit_stat), - VCPU_STAT(mmio_exit_user), - VCPU_STAT(mmio_exit_kernel), - VCPU_STAT(exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("hvc_exit_stat", hvc_exit_stat), + VCPU_STAT("wfe_exit_stat", wfe_exit_stat), + VCPU_STAT("wfi_exit_stat", wfi_exit_stat), + VCPU_STAT("mmio_exit_user", mmio_exit_user), + VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel), + VCPU_STAT("exits", exits), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), { NULL } }; @@ -267,7 +266,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) /* * Vector lengths supported by the host can't currently be * hidden from the guest individually: instead we can only set a - * maxmium via ZCR_EL2.LEN. So, make sure the available vector + * maximum via ZCR_EL2.LEN. So, make sure the available vector * lengths match the set requested exactly up to the requested * maximum: */ @@ -337,7 +336,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region, unsigned int reg_num; unsigned int reqoffset, reqlen; /* User-requested offset and length */ - unsigned int maxlen; /* Maxmimum permitted length */ + unsigned int maxlen; /* Maximum permitted length */ size_t sve_state_size; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index aacfc55de44c..eb194696ef62 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,7 +23,7 @@ #include <kvm/arm_hypercalls.h> #define CREATE_TRACE_POINTS -#include "trace.h" +#include "trace_handle_exit.h" typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ea710f674cb6..8c9880783839 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,20 +6,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -KVM=../../../../virt/kvm +obj-$(CONFIG_KVM) += hyp.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o - -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o -obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += entry.o -obj-$(CONFIG_KVM_ARM_HOST) += switch.o -obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o -obj-$(CONFIG_KVM_ARM_HOST) += tlb.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ + debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c new file mode 100644 index 000000000000..25c0e47d57cb --- /dev/null +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + u32 pc = *vcpu_pc(vcpu); + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + pc += 2; + else + pc += 4; + + *vcpu_pc(vcpu) = pc; + + kvm_adjust_itstate(vcpu); +} diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 1336e6f0acdf..676b6585e5ae 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(vcpu); - __vgic_v3_deactivate_traps(vcpu); + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); } } @@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(vcpu); - __vgic_v3_restore_state(vcpu); + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } } diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 000000000000..fb5c0be33223 --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..10ed539835c1 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,1113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) + +static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +{ + switch (lr & 0xf) { + case 0: + return read_gicreg(ICH_LR0_EL2); + case 1: + return read_gicreg(ICH_LR1_EL2); + case 2: + return read_gicreg(ICH_LR2_EL2); + case 3: + return read_gicreg(ICH_LR3_EL2); + case 4: + return read_gicreg(ICH_LR4_EL2); + case 5: + return read_gicreg(ICH_LR5_EL2); + case 6: + return read_gicreg(ICH_LR6_EL2); + case 7: + return read_gicreg(ICH_LR7_EL2); + case 8: + return read_gicreg(ICH_LR8_EL2); + case 9: + return read_gicreg(ICH_LR9_EL2); + case 10: + return read_gicreg(ICH_LR10_EL2); + case 11: + return read_gicreg(ICH_LR11_EL2); + case 12: + return read_gicreg(ICH_LR12_EL2); + case 13: + return read_gicreg(ICH_LR13_EL2); + case 14: + return read_gicreg(ICH_LR14_EL2); + case 15: + return read_gicreg(ICH_LR15_EL2); + } + + unreachable(); +} + +static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +{ + switch (lr & 0xf) { + case 0: + write_gicreg(val, ICH_LR0_EL2); + break; + case 1: + write_gicreg(val, ICH_LR1_EL2); + break; + case 2: + write_gicreg(val, ICH_LR2_EL2); + break; + case 3: + write_gicreg(val, ICH_LR3_EL2); + break; + case 4: + write_gicreg(val, ICH_LR4_EL2); + break; + case 5: + write_gicreg(val, ICH_LR5_EL2); + break; + case 6: + write_gicreg(val, ICH_LR6_EL2); + break; + case 7: + write_gicreg(val, ICH_LR7_EL2); + break; + case 8: + write_gicreg(val, ICH_LR8_EL2); + break; + case 9: + write_gicreg(val, ICH_LR9_EL2); + break; + case 10: + write_gicreg(val, ICH_LR10_EL2); + break; + case 11: + write_gicreg(val, ICH_LR11_EL2); + break; + case 12: + write_gicreg(val, ICH_LR12_EL2); + break; + case 13: + write_gicreg(val, ICH_LR13_EL2); + break; + case 14: + write_gicreg(val, ICH_LR14_EL2); + break; + case 15: + write_gicreg(val, ICH_LR15_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP0R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP0R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP0R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP0R3_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP1R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP1R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP1R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP1R3_EL2); + break; + } +} + +static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP0R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP0R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP0R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP0R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP1R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP1R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP1R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP1R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + dsb(sy); + isb(); + } + } + + if (used_lrs || cpu_if->its_vpe.its_vm) { + int i; + u32 elrsr; + + elrsr = read_gicreg(ICH_ELRSR_EL2); + + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1 << i)) + cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; + else + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); + + __gic_v3_set_lr(0, i); + } + } +} + +void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (used_lrs || cpu_if->its_vpe.its_vm) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } + + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } + } +} + +void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC, and VMCR_EL2 in + * particular. This logic must be called before + * __vgic_v3_restore_state(). + */ + if (!cpu_if->vgic_sre) { + write_gicreg(0, ICC_SRE_EL1); + isb(); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } + } + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } + + /* + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + /* Fall through */ + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + /* Fall through */ + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + /* Fall through */ + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + /* Fall through */ + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + /* Fall through */ + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + /* Fall through */ + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + /* Fall through */ + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + /* Fall through */ + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } +} + +void __hyp_text __vgic_v3_init_lrs(void) +{ + int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); + int i; + + for (i = 0; i <= max_lr_idx; i++) + __gic_v3_set_lr(0, i); +} + +u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +u64 __hyp_text __vgic_v3_read_vmcr(void) +{ + return read_gicreg(ICH_VMCR_EL2); +} + +void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +{ + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static int __hyp_text __vgic_v3_bpr_min(void) +{ + /* See Pseudocode for VPriorityGroup */ + return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); +} + +static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, + u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, + int intid, u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + +static int __hyp_text __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +{ + return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +{ + unsigned int bpr; + + if (vmcr & ICH_VMCR_CBPR_MASK) { + bpr = __vgic_v3_get_bpr0(vmcr); + if (bpr < 7) + bpr++; + } else { + bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + } + + return bpr; +} + +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minimal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + +static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __hyp_text __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + int lr; + + /* EOImode == 0, nothing to be done here */ + if (!(vmcr & ICH_VMCR_EOIM_MASK)) + return; + + /* No deactivate to be performed on an LPI */ + if (vid >= VGIC_MIN_LPI) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); +} + +static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); +} + +static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG0_MASK; + else + vmcr &= ~ICH_VMCR_ENG0_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG1_MASK; + else + vmcr &= ~ICH_VMCR_ENG1_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); +} + +static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); +} + +static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min() - 1; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR0_SHIFT; + val &= ICH_VMCR_BPR0_MASK; + vmcr &= ~ICH_VMCR_BPR0_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min(); + + if (vmcr & ICH_VMCR_CBPR_MASK) + return; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR1_SHIFT; + val &= ICH_VMCR_BPR1_MASK; + vmcr &= ~ICH_VMCR_BPR1_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val; + + if (!__vgic_v3_get_group(vcpu)) + val = __vgic_v3_read_ap0rn(n); + else + val = __vgic_v3_read_ap1rn(n); + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (!__vgic_v3_get_group(vcpu)) + __vgic_v3_write_ap0rn(val, n); + else + __vgic_v3_write_ap1rn(val, n); +} + +static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u64 lr_val; + int lr, lr_grp, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr == -1) + goto spurious; + + lr_grp = !!(lr_val & ICH_LR_GROUP); + if (lr_grp != grp) + lr_val = ICC_IAR1_EL1_SPURIOUS; + +spurious: + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); +} + +static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + vmcr &= ICH_VMCR_PMR_MASK; + vmcr >>= ICH_VMCR_PMR_SHIFT; + vcpu_set_reg(vcpu, rt, vmcr); +} + +static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + val <<= ICH_VMCR_PMR_SHIFT; + val &= ICH_VMCR_PMR_MASK; + vmcr &= ~ICH_VMCR_PMR_MASK; + vmcr |= val; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = __vgic_v3_get_highest_active_priority(); + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vtr, val; + + vtr = read_gicreg(ICH_VTR_EL2); + /* PRIbits */ + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ + val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + /* CBPR */ + val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (val & ICC_CTLR_EL1_CBPR_MASK) + vmcr |= ICH_VMCR_CBPR_MASK; + else + vmcr &= ~ICH_VMCR_CBPR_MASK; + + if (val & ICC_CTLR_EL1_EOImode_MASK) + vmcr |= ICH_VMCR_EOIM_MASK; + else + vmcr &= ~ICH_VMCR_EOIM_MASK; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_hsr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) { + __kvm_skip_instr(vcpu); + return 1; + } + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + case SYS_ICC_IAR0_EL1: + case SYS_ICC_IAR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_iar; + break; + case SYS_ICC_EOIR0_EL1: + case SYS_ICC_EOIR1_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_eoir; + break; + case SYS_ICC_IGRPEN1_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen1; + else + fn = __vgic_v3_write_igrpen1; + break; + case SYS_ICC_BPR1_EL1: + if (is_read) + fn = __vgic_v3_read_bpr1; + else + fn = __vgic_v3_write_bpr1; + break; + case SYS_ICC_AP0Rn_EL1(0): + case SYS_ICC_AP1Rn_EL1(0): + if (is_read) + fn = __vgic_v3_read_apxr0; + else + fn = __vgic_v3_write_apxr0; + break; + case SYS_ICC_AP0Rn_EL1(1): + case SYS_ICC_AP1Rn_EL1(1): + if (is_read) + fn = __vgic_v3_read_apxr1; + else + fn = __vgic_v3_write_apxr1; + break; + case SYS_ICC_AP0Rn_EL1(2): + case SYS_ICC_AP1Rn_EL1(2): + if (is_read) + fn = __vgic_v3_read_apxr2; + else + fn = __vgic_v3_write_apxr2; + break; + case SYS_ICC_AP0Rn_EL1(3): + case SYS_ICC_AP1Rn_EL1(3): + if (is_read) + fn = __vgic_v3_read_apxr3; + else + fn = __vgic_v3_write_apxr3; + break; + case SYS_ICC_HPPIR0_EL1: + case SYS_ICC_HPPIR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_hppir; + break; + case SYS_ICC_IGRPEN0_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen0; + else + fn = __vgic_v3_write_igrpen0; + break; + case SYS_ICC_BPR0_EL1: + if (is_read) + fn = __vgic_v3_read_bpr0; + else + fn = __vgic_v3_write_bpr0; + break; + case SYS_ICC_DIR_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_dir; + break; + case SYS_ICC_RPR_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_rpr; + break; + case SYS_ICC_CTLR_EL1: + if (is_read) + fn = __vgic_v3_read_ctlr; + else + fn = __vgic_v3_write_ctlr; + break; + case SYS_ICC_PMR_EL1: + if (is_read) + fn = __vgic_v3_read_pmr; + else + fn = __vgic_v3_write_pmr; + break; + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + __kvm_skip_instr(vcpu); + + return 1; +} diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c new file mode 100644 index 000000000000..550dfa3e53cd --- /dev/null +++ b/arch/arm64/kvm/hypercalls.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + gpa_t gpa; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch (feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + break; + case KVM_BP_HARDEN_WA_NEEDED: + val = SMCCC_RET_SUCCESS; + break; + case KVM_BP_HARDEN_NOT_REQUIRED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = SMCCC_RET_SUCCESS; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = kvm_hypercall_pv_features(vcpu); + break; + case ARM_SMCCC_HV_PV_TIME_ST: + gpa = kvm_init_stolen_time(vcpu); + if (gpa != GPA_INVALID) + val = gpa; + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6aafc2825c1c..e21fdd93027a 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -26,28 +26,12 @@ enum exception_type { except_type_serror = 0x180, }; -static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) -{ - u64 exc_offset; - - switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) { - case PSR_MODE_EL1t: - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - break; - case PSR_MODE_EL1h: - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - break; - case PSR_MODE_EL0t: - exc_offset = LOWER_EL_AArch64_VECTOR; - break; - default: - exc_offset = LOWER_EL_AArch32_VECTOR; - } - - return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; -} - /* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * * When an exception is taken, most PSTATE fields are left unchanged in the * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx @@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from * MSB to LSB. */ -static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) { - unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - unsigned long old, new; + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; old = *vcpu_cpsr(vcpu); new = 0; @@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) new |= PSR_I_BIT; new |= PSR_F_BIT; - new |= PSR_MODE_EL1h; + new |= target_mode; - return new; + *vcpu_cpsr(vcpu) = new; + vcpu_write_spsr(vcpu, old); } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr static void inject_undef64(struct kvm_vcpu *vcpu) { - unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); /* * Build an unknown exception, depending on the instruction diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c new file mode 100644 index 000000000000..4e0366759726 --- /dev/null +++ b/arch/arm64/kvm/mmio.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = *(u8 *)buf; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + +/** + * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation + * or in-kernel IO emulation + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + */ +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long data; + unsigned int len; + int mask; + + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + + vcpu->mmio_needed = 0; + + if (!kvm_vcpu_dabt_iswrite(vcpu)) { + len = kvm_vcpu_dabt_get_as(vcpu); + data = kvm_mmio_read_buf(run->mmio.data, len); + + if (kvm_vcpu_dabt_issext(vcpu) && + len < sizeof(unsigned long)) { + mask = 1U << ((len * 8) - 1); + data = (data ^ mask) - mask; + } + + if (!kvm_vcpu_dabt_issf(vcpu)) + data = data & 0xffffffff; + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + &data); + data = vcpu_data_host_to_guest(vcpu, data, len); + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); + } + + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + + return 0; +} + +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa) +{ + unsigned long data; + unsigned long rt; + int ret; + bool is_write; + int len; + u8 data_buf[8]; + + /* + * No valid syndrome? Ask userspace for help if it has + * volunteered to do so, and bail out otherwise. + */ + if (!kvm_vcpu_dabt_isvalid(vcpu)) { + if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + run->exit_reason = KVM_EXIT_ARM_NISV; + run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + + kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n"); + return -ENOSYS; + } + + /* Page table accesses IO mem: tell guest to fix its TTBR */ + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + /* + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. + */ + is_write = kvm_vcpu_dabt_iswrite(vcpu); + len = kvm_vcpu_dabt_get_as(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); + + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), + len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); + kvm_mmio_write_buf(data_buf, len, data); + + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, + fault_ipa, NULL); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } + + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + vcpu->mmio_needed = 1; + + if (!ret) { + /* We handled the access successfully in the kernel. */ + if (!is_write) + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_kernel++; + kvm_handle_mmio_return(vcpu, run); + return 1; + } + + if (is_write) + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + return 0; +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c new file mode 100644 index 000000000000..a1f6bc70c4e4 --- /dev/null +++ b/arch/arm64/kvm/mmu.c @@ -0,0 +1,2467 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/mman.h> +#include <linux/kvm_host.h> +#include <linux/io.h> +#include <linux/hugetlb.h> +#include <linux/sched/signal.h> +#include <trace/events/kvm.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_ras.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/virt.h> + +#include "trace.h" + +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; +static pgd_t *merged_hyp_pgd; +static DEFINE_MUTEX(kvm_hyp_pgd_mutex); + +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + +static unsigned long io_map_base; + +#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) +#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) + +static bool is_iomap(unsigned long flags) +{ + return flags & KVM_S2PTE_FLAG_IS_IOMAP; +} + +static bool memslot_is_logging(struct kvm_memory_slot *memslot) +{ + return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); +} + +/** + * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries + */ +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); +} + +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); +} + +/* + * D-Cache management functions. They take the page table entries by + * value, as they are flushing the cache using the kernel mapping (or + * kmap on 32bit). + */ +static void kvm_flush_dcache_pte(pte_t pte) +{ + __kvm_flush_dcache_pte(pte); +} + +static void kvm_flush_dcache_pmd(pmd_t pmd) +{ + __kvm_flush_dcache_pmd(pmd); +} + +static void kvm_flush_dcache_pud(pud_t pud) +{ + __kvm_flush_dcache_pud(pud); +} + +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + +/** + * stage2_dissolve_pmd() - clear and flush huge PMD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pmd: pmd pointer for IPA + * + * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. + */ +static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +{ + if (!pmd_thp_or_huge(*pmd)) + return; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); +} + +/** + * stage2_dissolve_pud() - clear and flush huge PUD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pud: pud pointer for IPA + * + * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. + */ +static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +{ + if (!stage2_pud_huge(kvm, *pudp)) + return; + + stage2_pud_clear(kvm, pudp); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pudp)); +} + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + int min, int max) +{ + void *page; + + BUG_ON(max > KVM_NR_MEM_OBJS); + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < max) { + page = (void *)__get_free_page(GFP_PGTABLE_USER); + if (!page) + return -ENOMEM; + cache->objects[cache->nobjs++] = page; + } + return 0; +} + +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); +} + +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) +{ + void *p; + + BUG_ON(!mc || !mc->nobjs); + p = mc->objects[--mc->nobjs]; + return p; +} + +static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +{ + pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); + stage2_pgd_clear(kvm, pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pud_free(kvm, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +{ + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_pud_clear(kvm, pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pmd_free(kvm, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + free_page((unsigned long)pte_table); + put_page(virt_to_page(pmd)); +} + +static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte) +{ + WRITE_ONCE(*ptep, new_pte); + dsb(ishst); +} + +static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd) +{ + WRITE_ONCE(*pmdp, new_pmd); + dsb(ishst); +} + +static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep) +{ + kvm_set_pmd(pmdp, kvm_mk_pmd(ptep)); +} + +static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp) +{ + WRITE_ONCE(*pudp, kvm_mk_pud(pmdp)); + dsb(ishst); +} + +static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp) +{ + WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp)); + dsb(ishst); +} + +/* + * Unmapping vs dcache management: + * + * If a guest maps certain memory pages as uncached, all writes will + * bypass the data cache and go directly to RAM. However, the CPUs + * can still speculate reads (not writes) and fill cache lines with + * data. + * + * Those cache lines will be *clean* cache lines though, so a + * clean+invalidate operation is equivalent to an invalidate + * operation, because no cache lines are marked dirty. + * + * Those clean cache lines could be filled prior to an uncached write + * by the guest, and the cache coherent IO subsystem would therefore + * end up writing old data to disk. + * + * This is why right after unmapping a page/section and invalidating + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure + * the IO subsystem will never hit in the cache. + * + * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as + * we then fully enforce cacheability of RAM, no matter what the guest + * does. + */ +static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + pte_t old_pte = *pte; + + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + /* No need to invalidate the cache for device mappings */ + if (!kvm_is_device_pfn(pte_pfn(old_pte))) + kvm_flush_dcache_pte(old_pte); + + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (stage2_pte_table_empty(kvm, start_pte)) + clear_stage2_pmd_entry(kvm, pmd, start_addr); +} + +static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + pmd_t old_pmd = *pmd; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pmd(old_pmd); + + put_page(virt_to_page(pmd)); + } else { + unmap_stage2_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); + + if (stage2_pmd_table_empty(kvm, start_pmd)) + clear_stage2_pud_entry(kvm, pud, start_addr); +} + +static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { + pud_t old_pud = *pud; + + stage2_pud_clear(kvm, pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_flush_dcache_pud(old_pud); + put_page(virt_to_page(pud)); + } else { + unmap_stage2_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (stage2_pud_table_empty(kvm, start_pud)) + clear_stage2_pgd_entry(kvm, pgd, start_addr); +} + +/** + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range + * @kvm: The VM pointer + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before + * destroying the VM), otherwise another faulting VCPU may come in and mess + * with things behind our backs. + */ +static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + assert_spin_locked(&kvm->mmu_lock); + WARN_ON(size & ~PAGE_MASK); + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + /* + * Make sure the page table is still active, as another thread + * could have possibly freed the page table, while we released + * the lock. + */ + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) + unmap_stage2_puds(kvm, pgd, addr, next); + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (next != end) + cond_resched_lock(&kvm->mmu_lock); + } while (pgd++, addr = next, addr != end); +} + +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) + kvm_flush_dcache_pte(*pte); + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(kvm, pud, addr); + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) + kvm_flush_dcache_pmd(*pmd); + else + stage2_flush_ptes(kvm, pmd, addr, next); + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) + kvm_flush_dcache_pud(*pud); + else + stage2_flush_pmds(kvm, pud, addr, next); + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) + stage2_flush_puds(kvm, pgd, addr, next); + + if (next != end) + cond_resched_lock(&kvm->mmu_lock); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +static void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + +static void clear_hyp_pgd_entry(pgd_t *pgd) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); + pgd_clear(pgd); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_hyp_pud_entry(pud_t *pud) +{ + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_hyp_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (hyp_pte_table_empty(start_pte)) + clear_hyp_pmd_entry(pmd); +} + +static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* Hyp doesn't use huge pmds */ + if (!pmd_none(*pmd)) + unmap_hyp_ptes(pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + if (hyp_pmd_table_empty(start_pmd)) + clear_hyp_pud_entry(pud); +} + +static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + /* Hyp doesn't use huge puds */ + if (!pud_none(*pud)) + unmap_hyp_pmds(pud, addr, next); + } while (pud++, addr = next, addr != end); + + if (hyp_pud_table_empty(start_pud)) + clear_hyp_pgd_entry(pgd); +} + +static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) +{ + return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); +} + +static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, + phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + /* + * We don't unmap anything from HYP, except at the hyp tear down. + * Hence, we don't have to invalidate the TLBs here. + */ + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); + do { + next = pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) + unmap_hyp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); +} + +static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); +} + +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the idmap range. + * + * boot_hyp_pgd should only map the idmap range, and is only used in + * the extended idmap case. + */ +void free_hyp_pgds(void) +{ + pgd_t *id_pgd; + + mutex_lock(&kvm_hyp_pgd_mutex); + + id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; + + if (id_pgd) { + /* In case we never called hyp_mmu_init() */ + if (!io_map_base) + io_map_base = hyp_idmap_start; + unmap_hyp_idmap_range(id_pgd, io_map_base, + hyp_idmap_start + PAGE_SIZE - io_map_base); + } + + if (boot_hyp_pgd) { + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) { + unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), + (uintptr_t)high_memory - PAGE_OFFSET); + + free_pages((unsigned long)hyp_pgd, hyp_pgd_order); + hyp_pgd = NULL; + } + if (merged_hyp_pgd) { + clear_page(merged_hyp_pgd); + free_page((unsigned long)merged_hyp_pgd); + merged_hyp_pgd = NULL; + } + + mutex_unlock(&kvm_hyp_pgd_mutex); +} + +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pte_t *pte; + unsigned long addr; + + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, kvm_pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); +} + +static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pmd_t *pmd; + pte_t *pte; + unsigned long addr, next; + + addr = start; + do { + pmd = pmd_offset(pud, addr); + + BUG_ON(pmd_sect(*pmd)); + + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL); + if (!pte) { + kvm_err("Cannot allocate Hyp pte\n"); + return -ENOMEM; + } + kvm_pmd_populate(pmd, pte); + get_page(virt_to_page(pmd)); + } + + next = pmd_addr_end(addr, end); + + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pud_t *pud; + pmd_t *pmd; + unsigned long addr, next; + int ret; + + addr = start; + do { + pud = pud_offset(pgd, addr); + + if (pud_none_or_clear_bad(pud)) { + pmd = pmd_alloc_one(NULL, addr); + if (!pmd) { + kvm_err("Cannot allocate Hyp pmd\n"); + return -ENOMEM; + } + kvm_pud_populate(pud, pmd); + get_page(virt_to_page(pud)); + } + + next = pud_addr_end(addr, end); + ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); + if (ret) + return ret; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + unsigned long addr, next; + int err = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); + + if (pgd_none(*pgd)) { + pud = pud_alloc_one(NULL, addr); + if (!pud) { + kvm_err("Cannot allocate Hyp pud\n"); + err = -ENOMEM; + goto out; + } + kvm_pgd_populate(pgd, pud); + get_page(virt_to_page(pgd)); + } + + next = pgd_addr_end(addr, end); + err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); + if (err) + goto out; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); +out: + mutex_unlock(&kvm_hyp_pgd_mutex); + return err; +} + +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + +/** + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode + * @from: The virtual kernel start address of the range + * @to: The virtual kernel end address of the range (exclusive) + * @prot: The protection to be applied to this range + * + * The same virtual address as the kernel virtual address is also used + * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying + * physical pages. + */ +int create_hyp_mappings(void *from, void *to, pgprot_t prot) +{ + phys_addr_t phys_addr; + unsigned long virt_addr; + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); + + if (is_kernel_in_hyp_mode()) + return 0; + + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); + + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, + virt_addr, virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + prot); + if (err) + return err; + } + + return 0; +} + +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, pgprot_t prot) +{ + pgd_t *pgd = hyp_pgd; + unsigned long base; + int ret = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + + /* + * This assumes that we have enough space below the idmap + * page to allocate our VAs. If not, the check below will + * kick. A potential alternative would be to detect that + * overflow and switch to an allocation above the idmap. + * + * The allocated size is always a multiple of PAGE_SIZE. + */ + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + base = io_map_base - size; + + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + ret = -ENOMEM; + else + io_map_base = base; + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) + goto out; + + if (__kvm_cpu_uses_extended_idmap()) + pgd = boot_hyp_pgd; + + ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + base, base + size, + __phys_to_pfn(phys_addr), prot); + if (ret) + goto out; + + *haddr = base + offset_in_page(phys_addr); + +out: + return ret; +} + +/** + * create_hyp_io_mappings - Map IO into both kernel and HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping + * @haddr: HYP VA for this mapping + */ +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr) +{ + unsigned long addr; + int ret; + + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; + return 0; + } + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_DEVICE); + if (ret) { + iounmap(*kaddr); + *kaddr = NULL; + *haddr = NULL; + return ret; + } + + *haddr = (void __iomem *)addr; + return 0; +} + +/** + * create_hyp_exec_mappings - Map an executable range into HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @haddr: HYP VA for this mapping + */ +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr) +{ + unsigned long addr; + int ret; + + BUG_ON(is_kernel_in_hyp_mode()); + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_EXEC); + if (ret) { + *haddr = NULL; + return ret; + } + + *haddr = (void *)addr; + return 0; +} + +/** + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. + * @kvm: The KVM struct pointer for the VM. + * + * Allocates only the stage-2 HW PGD level table(s) of size defined by + * stage2_pgd_size(kvm). + * + * Note we don't need locking here as this is only called when the VM is + * created, which can only be done once. + */ +int kvm_alloc_stage2_pgd(struct kvm *kvm) +{ + phys_addr_t pgd_phys; + pgd_t *pgd; + + if (kvm->arch.pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + /* Allocate the HW PGD, making sure that each page gets its own refcount */ + pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); + if (!pgd) + return -ENOMEM; + + pgd_phys = virt_to_phys(pgd); + if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) + return -EINVAL; + + kvm->arch.pgd = pgd; + kvm->arch.pgd_phys = pgd_phys; + return 0; +} + +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any regular RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + down_read(¤t->mm->mmap_sem); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + up_read(¤t->mm->mmap_sem); + srcu_read_unlock(&kvm->srcu, idx); +} + +/** + * kvm_free_stage2_pgd - free all stage-2 tables + * @kvm: The KVM struct pointer for the VM. + * + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all + * underlying level-2 and level-3 tables before freeing the actual level-1 table + * and setting the struct pointer to NULL. + */ +void kvm_free_stage2_pgd(struct kvm *kvm) +{ + void *pgd = NULL; + + spin_lock(&kvm->mmu_lock); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(kvm->arch.pgd); + kvm->arch.pgd = NULL; + kvm->arch.pgd_phys = 0; + } + spin_unlock(&kvm->mmu_lock); + + /* Free the HW pgd, one page at a time */ + if (pgd) + free_pages_exact(pgd, stage2_pgd_size(kvm)); +} + +static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + if (stage2_pgd_none(kvm, *pgd)) { + if (!cache) + return NULL; + pud = mmu_memory_cache_alloc(cache); + stage2_pgd_populate(kvm, pgd, pud); + get_page(virt_to_page(pgd)); + } + + return stage2_pud_offset(kvm, pgd, addr); +} + +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = stage2_get_pud(kvm, cache, addr); + if (!pud || stage2_pud_huge(kvm, *pud)) + return NULL; + + if (stage2_pud_none(kvm, *pud)) { + if (!cache) + return NULL; + pmd = mmu_memory_cache_alloc(cache); + stage2_pud_populate(kvm, pud, pmd); + get_page(virt_to_page(pud)); + } + + return stage2_pmd_offset(kvm, pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + +retry: + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + old_pmd = *pmd; + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + if (pmd_present(old_pmd)) { + /* + * If we already have PTE level mapping for this block, + * we must unmap it to avoid inconsistent TLB state and + * leaking the table page. We could end up in this situation + * if the memory slot was marked for dirty logging and was + * reverted, leaving PTE level mappings for the pages accessed + * during the period. So, unmap the PTE level mapping for this + * block and retry, as we could have released the upper level + * table in the process. + * + * Normal THP split/merge follows mmu_notifier callbacks and do + * get handled accordingly. + */ + if (!pmd_thp_or_huge(old_pmd)) { + unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + goto retry; + } + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ + WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pmd)); + } + + kvm_set_pmd(pmd, *new_pmd); + return 0; +} + +static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pud_t *new_pudp) +{ + pud_t *pudp, old_pud; + +retry: + pudp = stage2_get_pud(kvm, cache, addr); + VM_BUG_ON(!pudp); + + old_pud = *pudp; + + /* + * A large number of vcpus faulting on the same stage 2 entry, + * can lead to a refault due to the stage2_pud_clear()/tlb_flush(). + * Skip updating the page tables if there is no change. + */ + if (pud_val(old_pud) == pud_val(*new_pudp)) + return 0; + + if (stage2_pud_present(kvm, old_pud)) { + /* + * If we already have table level mapping for this block, unmap + * the range for this block and retry. + */ + if (!stage2_pud_huge(kvm, old_pud)) { + unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + goto retry; + } + + WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); + stage2_pud_clear(kvm, pudp); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pudp)); + } + + kvm_set_pud(pudp, *new_pudp); + return 0; +} + +/* + * stage2_get_leaf_entry - walk the stage2 VM page tables and return + * true if a valid and present leaf-entry is found. A pointer to the + * leaf-entry is returned in the appropriate level variable - pudpp, + * pmdpp, ptepp. + */ +static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, + pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) +{ + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + *pudpp = NULL; + *pmdpp = NULL; + *ptepp = NULL; + + pudp = stage2_get_pud(kvm, NULL, addr); + if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) + return false; + + if (stage2_pud_huge(kvm, *pudp)) { + *pudpp = pudp; + return true; + } + + pmdp = stage2_pmd_offset(kvm, pudp, addr); + if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp)) + return false; + + if (pmd_thp_or_huge(*pmdp)) { + *pmdpp = pmdp; + return true; + } + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep || pte_none(*ptep) || !pte_present(*ptep)) + return false; + + *ptepp = ptep; + return true; +} + +static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) +{ + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + bool found; + + found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + if (!found) + return false; + + if (pudp) + return kvm_s2pud_exec(pudp); + else if (pmdp) + return kvm_s2pmd_exec(pmdp); + else + return kvm_s2pte_exec(ptep); +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, + unsigned long flags) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte, old_pte; + bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; + bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; + + VM_BUG_ON(logging_active && !cache); + + /* Create stage-2 page table mapping - Levels 0 and 1 */ + pud = stage2_get_pud(kvm, cache, addr); + if (!pud) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* + * While dirty page logging - dissolve huge PUD, then continue + * on to allocate page. + */ + if (logging_active) + stage2_dissolve_pud(kvm, addr, pud); + + if (stage2_pud_none(kvm, *pud)) { + if (!cache) + return 0; /* ignore calls from kvm_set_spte_hva */ + pmd = mmu_memory_cache_alloc(cache); + stage2_pud_populate(kvm, pud, pmd); + get_page(virt_to_page(pud)); + } + + pmd = stage2_pmd_offset(kvm, pud, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* + * While dirty page logging - dissolve huge PMD, then continue on to + * allocate page. + */ + if (logging_active) + stage2_dissolve_pmd(kvm, addr, pmd); + + /* Create stage-2 page mappings - Level 2 */ + if (pmd_none(*pmd)) { + if (!cache) + return 0; /* ignore calls from kvm_set_spte_hva */ + pte = mmu_memory_cache_alloc(cache); + kvm_pmd_populate(pmd, pte); + get_page(virt_to_page(pmd)); + } + + pte = pte_offset_kernel(pmd, addr); + + if (iomap && pte_present(*pte)) + return -EFAULT; + + /* Create 2nd stage page table mapping - Level 3 */ + old_pte = *pte; + if (pte_present(old_pte)) { + /* Skip page table update if there is no change */ + if (pte_val(old_pte) == pte_val(*new_pte)) + return 0; + + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pte)); + } + + kvm_set_pte(pte, *new_pte); + return 0; +} + +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); + return 1; + } + return 0; +} +#else +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + return __ptep_test_and_clear_young(pte); +} +#endif + +static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pmd); +} + +static int stage2_pudp_test_and_clear_young(pud_t *pud) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pud); +} + +/** + * kvm_phys_addr_ioremap - map a device range to guest IPA + * + * @kvm: The KVM pointer + * @guest_ipa: The IPA at which to insert the mapping + * @pa: The physical address of the device + * @size: The size of the mapping + */ +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size, bool writable) +{ + phys_addr_t addr, end; + int ret = 0; + unsigned long pfn; + struct kvm_mmu_memory_cache cache = { 0, }; + + end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; + pfn = __phys_to_pfn(pa); + + for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { + pte_t pte = kvm_pfn_pte(pfn, PAGE_S2_DEVICE); + + if (writable) + pte = kvm_s2pte_mkwrite(pte); + + ret = mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); + if (ret) + goto out; + spin_lock(&kvm->mmu_lock); + ret = stage2_set_pte(kvm, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); + spin_unlock(&kvm->mmu_lock); + if (ret) + goto out; + + pfn++; + } + +out: + mmu_free_memory_cache(&cache); + return ret; +} + +/** + * stage2_wp_ptes - write protect PMD range + * @pmd: pointer to pmd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + if (!kvm_s2pte_readonly(pte)) + kvm_set_s2pte_readonly(pte); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +/** + * stage2_wp_pmds - write protect PUD range + * kvm: kvm instance for the VM + * @pud: pointer to pud entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(kvm, pud, addr); + + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + if (!kvm_s2pmd_readonly(pmd)) + kvm_set_s2pmd_readonly(pmd); + } else { + stage2_wp_ptes(pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +/** + * stage2_wp_puds - write protect PGD range + * @pgd: pointer to pgd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { + if (!kvm_s2pud_readonly(pud)) + kvm_set_s2pud_readonly(pud); + } else { + stage2_wp_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); +} + +/** + * stage2_wp_range() - write protect stage2 memory region range + * @kvm: The KVM pointer + * @addr: Start address of range + * @end: End address of range + */ +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +{ + pgd_t *pgd; + phys_addr_t next; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + /* + * Release kvm_mmu_lock periodically if the memory region is + * large. Otherwise, we may see kernel panics with + * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, + * CONFIG_LOCKDEP. Additionally, holding the lock too long + * will also starve other vCPUs. We have to also make sure + * that the page tables are not freed while we released + * the lock. + */ + cond_resched_lock(&kvm->mmu_lock); + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(kvm, addr, end); + if (stage2_pgd_present(kvm, *pgd)) + stage2_wp_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to write protect + * + * Called to start logging dirty pages after memory region + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns + * all present PUD, PMD and PTEs are write protected in the memory region. + * Afterwards read of dirty page log can be called. + * + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); + phys_addr_t start, end; + + if (WARN_ON_ONCE(!memslot)) + return; + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); +} + +/** + * kvm_mmu_write_protect_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire kvm_mmu_lock. + */ +static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + stage2_wp_range(kvm, start, end); +} + +/* + * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected + * dirty pages. + * + * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to + * enable dirty logging for them. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); +} + +static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __clean_dcache_guest_page(pfn, size); +} + +static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __invalidate_icache_guest_page(pfn, size); +} + +static void kvm_send_hwpoison_signal(unsigned long address, short lsb) +{ + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); +} + +static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva, + unsigned long map_size) +{ + gpa_t gpa_start; + hva_t uaddr_start, uaddr_end; + size_t size; + + /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */ + if (map_size == PAGE_SIZE) + return true; + + size = memslot->npages * PAGE_SIZE; + + gpa_start = memslot->base_gfn << PAGE_SHIFT; + + uaddr_start = memslot->userspace_addr; + uaddr_end = uaddr_start + size; + + /* + * Pages belonging to memslots that don't have the same alignment + * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2 + * PMD/PUD entries, because we'll end up mapping the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SHIFT: + * +---+--------------------+--------------------+-----+ + * |abc|def Stage-2 block | Stage-2 block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those stage-2 blocks, we'll end up with this incorrect + * mapping: + * d -> f + * e -> g + * f -> h + */ + if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) + return false; + + /* + * Next, let's make sure we're not trying to map anything not covered + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized + * memory slot (illustrated by the head and tail parts of the + * userspace view above containing pages 'abcde' and 'xyz', + * respectively). + * + * Note that it doesn't matter if we do the check using the + * userspace_addr or the base_gfn, as both are equally aligned (per + * the check above) and equally sized. + */ + return (hva & ~(map_size - 1)) >= uaddr_start && + (hva & ~(map_size - 1)) + map_size <= uaddr_end; +} + +/* + * Check if the given hva is backed by a transparent huge page (THP) and + * whether it can be mapped using block mapping in stage2. If so, adjust + * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently + * supported. This will need to be updated to support other THP sizes. + * + * Returns the size of the mapping. + */ +static unsigned long +transparent_hugepage_adjust(struct kvm_memory_slot *memslot, + unsigned long hva, kvm_pfn_t *pfnp, + phys_addr_t *ipap) +{ + kvm_pfn_t pfn = *pfnp; + + /* + * Make sure the adjustment is done only for THP pages. Also make + * sure that the HVA and IPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ + if (kvm_is_transparent_hugepage(pfn) && + fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~(PTRS_PER_PMD - 1); + kvm_get_pfn(pfn); + *pfnp = pfn; + + return PMD_SIZE; + } + + /* Use page mapping if we cannot use block mapping. */ + return PAGE_SIZE; +} + +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_memory_slot *memslot, unsigned long hva, + unsigned long fault_status) +{ + int ret; + bool write_fault, writable, force_pte = false; + bool exec_fault, needs_exec; + unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + short vma_shift; + kvm_pfn_t pfn; + pgprot_t mem_type = PAGE_S2; + bool logging_active = memslot_is_logging(memslot); + unsigned long vma_pagesize, flags = 0; + + write_fault = kvm_is_write_fault(vcpu); + exec_fault = kvm_vcpu_trap_is_iabt(vcpu); + VM_BUG_ON(write_fault && exec_fault); + + if (fault_status == FSC_PERM && !write_fault && !exec_fault) { + kvm_err("Unexpected L2 read permission error\n"); + return -EFAULT; + } + + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + + if (is_vm_hugetlb_page(vma)) + vma_shift = huge_page_shift(hstate_vma(vma)); + else + vma_shift = PAGE_SHIFT; + + vma_pagesize = 1ULL << vma_shift; + if (logging_active || + (vma->vm_flags & VM_PFNMAP) || + !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { + force_pte = true; + vma_pagesize = PAGE_SIZE; + } + + /* + * The stage2 has a minimum of 2 level table (For arm64 see + * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can + * use PMD_SIZE huge mappings (even when the PMD is folded into PGD). + * As for PUD huge maps, we must make sure that we have at least + * 3 levels, i.e, PMD is not folded. + */ + if (vma_pagesize == PMD_SIZE || + (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) + gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; + up_read(¤t->mm->mmap_sem); + + /* We need minimum second+third level pages */ + ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); + if (ret) + return ret; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq happens before we call + * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk + * the page we just got a reference to gets unmapped before we have a + * chance to grab the mmu_lock, which ensure that if the page gets + * unmapped afterwards, the call to kvm_unmap_hva will take it away + * from us again properly. This smp_rmb() interacts with the smp_wmb() + * in kvm_mmu_notifier_invalidate_<page|range_end>. + */ + smp_rmb(); + + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + if (pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(hva, vma_shift); + return 0; + } + if (is_error_noslot_pfn(pfn)) + return -EFAULT; + + if (kvm_is_device_pfn(pfn)) { + mem_type = PAGE_S2_DEVICE; + flags |= KVM_S2PTE_FLAG_IS_IOMAP; + } else if (logging_active) { + /* + * Faults on pages in a memslot with logging enabled + * should not be mapped with huge pages (it introduces churn + * and performance degradation), so force a pte mapping. + */ + flags |= KVM_S2_FLAG_LOGGING_ACTIVE; + + /* + * Only actually map the page as writable if this was a write + * fault. + */ + if (!write_fault) + writable = false; + } + + if (exec_fault && is_iomap(flags)) + return -ENOEXEC; + + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + + /* + * If we are not forced to use page mapping, check if we are + * backed by a THP and thus use block mapping if possible. + */ + if (vma_pagesize == PAGE_SIZE && !force_pte) + vma_pagesize = transparent_hugepage_adjust(memslot, hva, + &pfn, &fault_ipa); + if (writable) + kvm_set_pfn_dirty(pfn); + + if (fault_status != FSC_PERM && !is_iomap(flags)) + clean_dcache_guest_page(pfn, vma_pagesize); + + if (exec_fault) + invalidate_icache_guest_page(pfn, vma_pagesize); + + /* + * If we took an execution fault we have made the + * icache/dcache coherent above and should now let the s2 + * mapping be executable. + * + * Write faults (!exec_fault && FSC_PERM) are orthogonal to + * execute permissions, and we preserve whatever we have. + */ + needs_exec = exec_fault || + (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa)); + + if (vma_pagesize == PUD_SIZE) { + pud_t new_pud = kvm_pfn_pud(pfn, mem_type); + + new_pud = kvm_pud_mkhuge(new_pud); + if (writable) + new_pud = kvm_s2pud_mkwrite(new_pud); + + if (needs_exec) + new_pud = kvm_s2pud_mkexec(new_pud); + + ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + } else if (vma_pagesize == PMD_SIZE) { + pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); + + new_pmd = kvm_pmd_mkhuge(new_pmd); + + if (writable) + new_pmd = kvm_s2pmd_mkwrite(new_pmd); + + if (needs_exec) + new_pmd = kvm_s2pmd_mkexec(new_pmd); + + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = kvm_pfn_pte(pfn, mem_type); + + if (writable) { + new_pte = kvm_s2pte_mkwrite(new_pte); + mark_page_dirty(kvm, gfn); + } + + if (needs_exec) + new_pte = kvm_s2pte_mkexec(new_pte); + + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + } + +out_unlock: + spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); + kvm_release_pfn_clean(pfn); + return ret; +} + +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), + * so there is no need for atomic (pte|pmd)_mkyoung operations. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + kvm_pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + goto out; + + if (pud) { /* HugeTLB */ + *pud = kvm_s2pud_mkyoung(*pud); + pfn = kvm_pud_pfn(*pud); + pfn_valid = true; + } else if (pmd) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + } else { + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; + } + +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + +/** + * kvm_handle_guest_abort - handles all 2nd stage aborts + * @vcpu: the VCPU pointer + * @run: the kvm_run structure + * + * Any abort that gets to the host is almost guaranteed to be caused by a + * missing second stage translation table entry, which can mean that either the + * guest simply needs more memory and we must allocate an appropriate page or it + * can mean that the guest tried to access I/O memory, which is emulated by user + * space. The distinction is based on the IPA causing the fault and whether this + * memory region has been registered as standard RAM by user space. + */ +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot; + unsigned long hva; + bool is_iabt, write_fault, writable; + gfn_t gfn; + int ret, idx; + + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + + /* Synchronous External Abort? */ + if (kvm_vcpu_dabt_isextabt(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + return 1; + + if (unlikely(!is_iabt)) { + kvm_inject_vabt(vcpu); + return 1; + } + } + + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); + + /* Check the stage-2 fault is trans. fault or write fault */ + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; + } + + idx = srcu_read_lock(&vcpu->kvm->srcu); + + gfn = fault_ipa >> PAGE_SHIFT; + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + if (is_iabt) { + /* Prefetch Abort on I/O address */ + ret = -ENOEXEC; + goto out; + } + + /* + * Check for a cache maintenance operation. Since we + * ended-up here, we know it is outside of any memory + * slot. But we can't find out if that is for a device, + * or if the guest is just being stupid. The only thing + * we know for sure is that this range cannot be cached. + * + * So let's assume that the guest is just being + * cautious, and skip the instruction. + */ + if (kvm_vcpu_dabt_is_cm(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + ret = 1; + goto out_unlock; + } + + /* + * The IPA is reported as [MAX:12], so we need to + * complement it with the bottom 12 bits from the + * faulting VA. This is always 12 bits, irrespective + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); + ret = io_mem_abort(vcpu, run, fault_ipa); + goto out_unlock; + } + + /* Userspace should not be able to register out-of-bounds IPAs */ + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); + + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + if (ret == 0) + ret = 1; +out: + if (ret == -ENOEXEC) { + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + } +out_unlock: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, u64 size, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gpa; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; + ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); + } + + return ret; +} + +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + unmap_stage2_range(kvm, gpa, size); + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + + trace_kvm_unmap_hva_range(start, end); + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pte_t *pte = (pte_t *)data; + + WARN_ON(size != PAGE_SIZE); + /* + * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE + * flag clear because MMU notifiers will have unmapped a huge PMD before + * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and + * therefore stage2_set_pte() never needs to clear out a huge PMD + * through this calling path. + */ + stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; +} + + +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + kvm_pfn_t pfn = pte_pfn(pte); + pte_t stage2_pte; + + if (!kvm->arch.pgd) + return 0; + + trace_kvm_set_spte_hva(hva); + + /* + * We've moved a page around, probably through CoW, so let's treat it + * just like a translation fault and clean the cache to the PoC. + */ + clean_dcache_guest_page(pfn, PAGE_SIZE); + stage2_pte = kvm_pfn_pte(pfn, PAGE_S2); + handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); + + return 0; +} + +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); + if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + return 0; + + if (pud) + return stage2_pudp_test_and_clear_young(pud); + else if (pmd) + return stage2_pmdp_test_and_clear_young(pmd); + else + return stage2_ptep_test_and_clear_young(pte); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); + if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + return 0; + + if (pud) + return kvm_s2pud_young(*pud); + else if (pmd) + return pmd_young(*pmd); + else + return pte_young(*pte); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.pgd) + return 0; + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); +} + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +phys_addr_t kvm_mmu_get_httbr(void) +{ + if (__kvm_cpu_uses_extended_idmap()) + return virt_to_phys(merged_hyp_pgd); + else + return virt_to_phys(hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + +static int kvm_map_idmap_text(pgd_t *pgd) +{ + int err; + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP_EXEC); + if (err) + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + + return err; +} + +int kvm_mmu_init(void) +{ + int err; + + hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); + hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); + hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end); + hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); + hyp_idmap_vector = __pa_symbol(__kvm_hyp_init); + + /* + * We rely on the linker script to ensure at build time that the HYP + * init code does not cross a page boundary. + */ + BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); + + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), + kern_hyp_va((unsigned long)high_memory - 1)); + + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { + /* + * The idmap page is intersecting with the VA space, + * it is not safe to continue further. + */ + kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); + err = -EINVAL; + goto out; + } + + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + if (!hyp_pgd) { + kvm_err("Hyp mode PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + if (__kvm_cpu_uses_extended_idmap()) { + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + hyp_pgd_order); + if (!boot_hyp_pgd) { + kvm_err("Hyp boot PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + err = kvm_map_idmap_text(boot_hyp_pgd); + if (err) + goto out; + + merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!merged_hyp_pgd) { + kvm_err("Failed to allocate extra HYP pgd\n"); + goto out; + } + __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, + hyp_idmap_start); + } else { + err = kvm_map_idmap_text(hyp_pgd); + if (err) + goto out; + } + + io_map_base = hyp_idmap_start; + return 0; +out: + free_hyp_pgds(); + return err; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be tracked while the + * memory slot is write protected. + */ + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* + * If we're with initial-all-set, we don't need to write + * protect any pages because they're all reported as dirty. + * Huge pages and normal pages will be write protect gradually. + */ + if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) { + kvm_mmu_wp_memory_region(kvm, mem->slot); + } + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + hva_t hva = mem->userspace_addr; + hva_t reg_end = hva + mem->memory_size; + bool writable = !(mem->flags & KVM_MEM_READONLY); + int ret = 0; + + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && + change != KVM_MR_FLAGS_ONLY) + return 0; + + /* + * Prevent userspace from creating a memory region outside of the IPA + * space addressable by the KVM guest IPA space. + */ + if (memslot->base_gfn + memslot->npages >= + (kvm_phys_size(kvm) >> PAGE_SHIFT)) + return -EFAULT; + + down_read(¤t->mm->mmap_sem); + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we can map + * any of them right now. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (vma->vm_flags & VM_PFNMAP) { + gpa_t gpa = mem->guest_phys_addr + + (vm_start - mem->userspace_addr); + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; + + /* IO region dirty page logging not allowed */ + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, + vm_end - vm_start, + writable); + if (ret) + break; + } + hva = vm_end; + } while (hva < reg_end); + + if (change == KVM_MR_FLAGS_ONLY) + goto out; + + spin_lock(&kvm->mmu_lock); + if (ret) + unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); +out: + up_read(¤t->mm->mmap_sem); + return ret; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) +{ +} + +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + kvm_free_stage2_pgd(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; + phys_addr_t size = slot->npages << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); +} + +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + * + * Main problems: + * - S/W ops are local to a CPU (not broadcast) + * - We have line migration behind our back (speculation) + * - System caches don't support S/W at all (damn!) + * + * In the face of the above, the best we can do is to try and convert + * S/W ops to VA ops. Because the guest is not allowed to infer the + * S/W to PA mapping, it can only use S/W to nuke the whole cache, + * which is a rather good thing for us. + * + * Also, it is only used when turning caches on/off ("The expected + * usage of the cache maintenance instructions that operate by set/way + * is associated with the cache maintenance instructions associated + * with the powerdown and powerup of caches, if this is required by + * the implementation."). + * + * We use the following policy: + * + * - If we trap a S/W operation, we enable VM trapping to detect + * caches being turned on/off, and do a full clean. + * + * - We flush the caches on both caches being turned on and off. + * + * - Once the caches are enabled, we stop trapping VM ops. + */ +void kvm_set_way_flush(struct kvm_vcpu *vcpu) +{ + unsigned long hcr = *vcpu_hcr(vcpu); + + /* + * If this is the first time we do a S/W operation + * (i.e. HCR_TVM not set) flush the whole memory, and set the + * VM trapping. + * + * Otherwise, rely on the VM trapping to wait for the MMU + + * Caches to be turned off. At that point, we'll be able to + * clean the caches again. + */ + if (!(hcr & HCR_TVM)) { + trace_kvm_set_way_flush(*vcpu_pc(vcpu), + vcpu_has_cache_enabled(vcpu)); + stage2_flush_vm(vcpu->kvm); + *vcpu_hcr(vcpu) = hcr | HCR_TVM; + } +} + +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) +{ + bool now_enabled = vcpu_has_cache_enabled(vcpu); + + /* + * If switching the MMU+caches on, need to invalidate the caches. + * If switching it off, need to clean the caches. + * Clean + invalidate does the trick always. + */ + if (now_enabled != was_enabled) + stage2_flush_vm(vcpu->kvm); + + /* Caches are now on, stop trapping VM ops (until a S/W op) */ + if (now_enabled) + *vcpu_hcr(vcpu) &= ~HCR_TVM; + + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); +} diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c new file mode 100644 index 000000000000..d45b8b9a4415 --- /dev/null +++ b/arch/arm64/kvm/perf.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/perf_event.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +static int kvm_is_in_guest(void) +{ + return kvm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c new file mode 100644 index 000000000000..f0d0312c0a55 --- /dev/null +++ b/arch/arm64/kvm/pmu-emul.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Linaro Ltd. + * Author: Shannon Zhao <shannon.zhao@linaro.org> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/perf_event.h> +#include <linux/perf/arm_pmu.h> +#include <linux/uaccess.h> +#include <asm/kvm_emulate.h> +#include <kvm/arm_pmu.h> +#include <kvm/arm_vgic.h> + +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); + +#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 + +/** + * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX && + __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); +} + +static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) +{ + struct kvm_pmu *pmu; + struct kvm_vcpu_arch *vcpu_arch; + + pmc -= pmc->idx; + pmu = container_of(pmc, struct kvm_pmu, pmc[0]); + vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); + return container_of(vcpu_arch, struct kvm_vcpu, arch); +} + +/** + * kvm_pmu_pmc_is_chained - determine if the pmc is chained + * @pmc: The PMU counter pointer + */ +static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + + return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); +} + +/** + * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_high_counter(u64 select_idx) +{ + return select_idx & 0x1; +} + +/** + * kvm_pmu_get_canonical_pmc - obtain the canonical pmc + * @pmc: The PMU counter pointer + * + * When a pair of PMCs are chained together we use the low counter (canonical) + * to hold the underlying perf event. + */ +static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + + return pmc; +} +static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + else + return pmc + 1; +} + +/** + * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 eventsel, reg; + + select_idx |= 0x1; + + if (select_idx == ARMV8_PMU_CYCLE_IDX) + return false; + + reg = PMEVTYPER0_EL0 + select_idx; + eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT; + + return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; +} + +/** + * kvm_pmu_get_pair_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @pmc: The PMU counter pointer + */ +static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, + struct kvm_pmc *pmc) +{ + u64 counter, counter_high, reg, enabled, running; + + if (kvm_pmu_pmc_is_chained(pmc)) { + pmc = kvm_pmu_get_canonical_pmc(pmc); + reg = PMEVCNTR0_EL0 + pmc->idx; + + counter = __vcpu_sys_reg(vcpu, reg); + counter_high = __vcpu_sys_reg(vcpu, reg + 1); + + counter = lower_32_bits(counter) | (counter_high << 32); + } else { + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); + } + + /* + * The real counter value is equal to the value of counter register plus + * the value perf event counts. + */ + if (pmc->perf_event) + counter += perf_event_read_value(pmc->perf_event, &enabled, + &running); + + return counter; +} + +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 counter; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(select_idx)) + counter = upper_32_bits(counter); + else if (select_idx != ARMV8_PMU_CYCLE_IDX) + counter = lower_32_bits(counter); + + return counter; +} + +/** + * kvm_pmu_set_counter_value - set PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + * @val: The counter value + */ +void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +{ + u64 reg; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + + /* Recreate the perf event to reflect the updated sample_period */ + kvm_pmu_create_perf_event(vcpu, select_idx); +} + +/** + * kvm_pmu_release_perf_event - remove the perf event + * @pmc: The PMU counter pointer + */ +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) +{ + pmc = kvm_pmu_get_canonical_pmc(pmc); + if (pmc->perf_event) { + perf_event_disable(pmc->perf_event); + perf_event_release_kernel(pmc->perf_event); + pmc->perf_event = NULL; + } +} + +/** + * kvm_pmu_stop_counter - stop PMU counter + * @pmc: The PMU counter pointer + * + * If this counter has been configured to monitor some event, release it here. + */ +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +{ + u64 counter, reg, val; + + pmc = kvm_pmu_get_canonical_pmc(pmc); + if (!pmc->perf_event) + return; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + reg = PMCCNTR_EL0; + val = counter; + } else { + reg = PMEVCNTR0_EL0 + pmc->idx; + val = lower_32_bits(counter); + } + + __vcpu_sys_reg(vcpu, reg) = val; + + if (kvm_pmu_pmc_is_chained(pmc)) + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + + kvm_pmu_release_perf_event(pmc); +} + +/** + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + pmu->pmc[i].idx = i; +} + +/** + * kvm_pmu_vcpu_reset - reset pmu state for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) +{ + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); + struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; + + for_each_set_bit(i, &mask, 32) + kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); + + bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); +} + +/** + * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + kvm_pmu_release_perf_event(&pmu->pmc[i]); +} + +u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) +{ + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + + val &= ARMV8_PMU_PMCR_N_MASK; + if (val == 0) + return BIT(ARMV8_PMU_CYCLE_IDX); + else + return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); +} + +/** + * kvm_pmu_enable_counter_mask - enable selected PMU counters + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCNTENSET register + * + * Call perf_event_enable to start counting the perf event + */ +void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + return; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + if (!(val & BIT(i))) + continue; + + pmc = &pmu->pmc[i]; + + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); + + /* At this point, pmc must be the canonical */ + if (pmc->perf_event) { + perf_event_enable(pmc->perf_event); + if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) + kvm_debug("fail to enable perf event\n"); + } + } +} + +/** + * kvm_pmu_disable_counter_mask - disable selected PMU counters + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCNTENCLR register + * + * Call perf_event_disable to stop counting the perf event + */ +void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + + if (!val) + return; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + if (!(val & BIT(i))) + continue; + + pmc = &pmu->pmc[i]; + + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); + + /* At this point, pmc must be the canonical */ + if (pmc->perf_event) + perf_event_disable(pmc->perf_event); + } +} + +static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) +{ + u64 reg = 0; + + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); + reg &= kvm_pmu_valid_counter_mask(vcpu); + } + + return reg; +} + +static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool overflow; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return; + + overflow = !!kvm_pmu_overflow_status(vcpu); + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow, pmu); + WARN_ON(ret); + } +} + +bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + return pmu->irq_level != run_level; +} + +/* + * Reflect the PMU overflow interrupt output level into the kvm_run structure + */ +void kvm_pmu_update_run(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the timer bitmap for user space */ + regs->device_irq_level &= ~KVM_ARM_DEV_PMU; + if (vcpu->arch.pmu.irq_level) + regs->device_irq_level |= KVM_ARM_DEV_PMU; +} + +/** + * kvm_pmu_flush_hwstate - flush pmu state to cpu + * @vcpu: The vcpu pointer + * + * Check if the PMU has overflowed while we were running in the host, and inject + * an interrupt if that was the case. + */ +void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) +{ + kvm_pmu_update_state(vcpu); +} + +/** + * kvm_pmu_sync_hwstate - sync pmu state from cpu + * @vcpu: The vcpu pointer + * + * Check if the PMU has overflowed while we were running in the guest, and + * inject an interrupt if that was the case. + */ +void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) +{ + kvm_pmu_update_state(vcpu); +} + +/** + * When the perf event overflows, set the overflow status and inform the vcpu. + */ +static void kvm_pmu_perf_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + int idx = pmc->idx; + u64 period; + + cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * Reset the sample period to the architectural limit, + * i.e. the point where the counter overflows. + */ + period = -(local64_read(&perf_event->count)); + + if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + period &= GENMASK(31, 0); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + + if (kvm_pmu_overflow_status(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + + cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +/** + * kvm_pmu_software_increment - do software increment + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMSWINC register + */ +void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + + if (!(val & BIT(i))) + continue; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + } + } +} + +/** + * kvm_pmu_handle_pmcr - handle PMCR register + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCR register + */ +void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); + int i; + + if (val & ARMV8_PMU_PMCR_E) { + kvm_pmu_enable_counter_mask(vcpu, + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + } else { + kvm_pmu_disable_counter_mask(vcpu, mask); + } + + if (val & ARMV8_PMU_PMCR_C) + kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); + + if (val & ARMV8_PMU_PMCR_P) { + for_each_set_bit(i, &mask, 32) + kvm_pmu_set_counter_value(vcpu, i, 0); + } +} + +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); +} + +/** + * kvm_pmu_create_perf_event - create a perf event for a counter + * @vcpu: The vcpu pointer + * @select_idx: The number of selected counter + */ +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + struct perf_event *event; + struct perf_event_attr attr; + u64 eventsel, counter, reg, data; + + /* + * For chained counters the event type and filtering attributes are + * obtained from the low/even counter. We also use this counter to + * determine if the event is enabled/disabled. + */ + pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); + + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + data = __vcpu_sys_reg(vcpu, reg); + + kvm_pmu_stop_counter(vcpu, pmc); + eventsel = data & ARMV8_PMU_EVTYPE_EVENT; + + /* Software increment event does't need to be backed by a perf event */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + pmc->idx != ARMV8_PMU_CYCLE_IDX) + return; + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.type = PERF_TYPE_RAW; + attr.size = sizeof(attr); + attr.pinned = 1; + attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; + attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; + attr.exclude_hv = 1; /* Don't count EL2 events */ + attr.exclude_host = 1; /* Don't count host events */ + attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc)) { + /** + * The initial sample period (overflow count) of an event. For + * chained counters we only support overflow interrupts on the + * high counter. + */ + attr.sample_period = (-counter) & GENMASK(63, 0); + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, + pmc + 1); + } else { + /* The initial sample period (overflow count) of an event. */ + if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + attr.sample_period = (-counter) & GENMASK(63, 0); + else + attr.sample_period = (-counter) & GENMASK(31, 0); + + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, pmc); + } + + if (IS_ERR(event)) { + pr_err_once("kvm: pmu event creation failed %ld\n", + PTR_ERR(event)); + return; + } + + pmc->perf_event = event; +} + +/** + * kvm_pmu_update_pmc_chained - update chained bitmap + * @vcpu: The vcpu pointer + * @select_idx: The number of selected counter + * + * Update the chained bitmap based on the event type written in the + * typer register and the enable state of the odd register. + */ +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; + bool new_state, old_state; + + old_state = kvm_pmu_pmc_is_chained(pmc); + new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && + kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); + + if (old_state == new_state) + return; + + canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); + kvm_pmu_stop_counter(vcpu, canonical_pmc); + if (new_state) { + /* + * During promotion from !chained to chained we must ensure + * the adjacent counter is stopped and its event destroyed + */ + kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); + set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return; + } + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); +} + +/** + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event + * @vcpu: The vcpu pointer + * @data: The data guest writes to PMXEVTYPER_EL0 + * @select_idx: The number of selected counter + * + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an + * event with given hardware event number. Here we call perf_event API to + * emulate this action and create a kernel perf event for it. + */ +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, + u64 select_idx) +{ + u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + + __vcpu_sys_reg(vcpu, reg) = event_type; + + kvm_pmu_update_pmc_chained(vcpu, select_idx); + kvm_pmu_create_perf_event(vcpu, select_idx); +} + +bool kvm_arm_support_pmu_v3(void) +{ + /* + * Check if HW_PERF_EVENTS are supported by checking the number of + * hardware performance counters. This could ensure the presence of + * a physical PMU and CONFIG_PERF_EVENT is selected. + */ + return (perf_num_counters() > 0); +} + +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.pmu.created) + return 0; + + /* + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to not have an in-kernel GIC and not set an IRQ. + */ + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) + return -ENODEV; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENXIO; + + if (vcpu->arch.pmu.created) + return -EBUSY; + + if (irqchip_in_kernel(vcpu->kvm)) { + int ret; + + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + + ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, + &vcpu->arch.pmu); + if (ret) + return ret; + } + + vcpu->arch.pmu.created = true; + return 0; +} + +/* + * For one VM the interrupt type must be same for each vcpu. + * As a PPI, the interrupt number is the same for all vcpus, + * while as an SPI it must be a separate number per vcpu. + */ +static bool pmu_irq_is_valid(struct kvm *kvm, int irq) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_arm_pmu_irq_initialized(vcpu)) + continue; + + if (irq_is_ppi(irq)) { + if (vcpu->arch.pmu.irq_num != irq) + return false; + } else { + if (vcpu->arch.pmu.irq_num == irq) + return false; + } + } + + return true; +} + +int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: { + int __user *uaddr = (int __user *)(long)attr->addr; + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENODEV; + + if (get_user(irq, uaddr)) + return -EFAULT; + + /* The PMU overflow interrupt can be a PPI or a valid SPI. */ + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) + return -EINVAL; + + if (!pmu_irq_is_valid(vcpu->kvm, irq)) + return -EINVAL; + + if (kvm_arm_pmu_irq_initialized(vcpu)) + return -EBUSY; + + kvm_debug("Set kvm ARM PMU irq: %d\n", irq); + vcpu->arch.pmu.irq_num = irq; + return 0; + } + case KVM_ARM_VCPU_PMU_V3_INIT: + return kvm_arm_pmu_v3_init(vcpu); + } + + return -ENXIO; +} + +int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: { + int __user *uaddr = (int __user *)(long)attr->addr; + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + + irq = vcpu->arch.pmu.irq_num; + return put_user(irq, uaddr); + } + } + + return -ENXIO; +} + +int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: + case KVM_ARM_VCPU_PMU_V3_INIT: + if (kvm_arm_support_pmu_v3() && + test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return 0; + } + + return -ENXIO; +} diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c new file mode 100644 index 000000000000..83415e96b589 --- /dev/null +++ b/arch/arm64/kvm/psci.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/arm-smccc.h> +#include <linux/preempt.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <linux/wait.h> + +#include <asm/cputype.h> +#include <asm/kvm_emulate.h> + +#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> + +/* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. + */ + +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + + return PSCI_RET_SUCCESS; +} + +static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct vcpu_reset_state *reset_state; + struct kvm *kvm = source_vcpu->kvm; + struct kvm_vcpu *vcpu = NULL; + unsigned long cpu_id; + + cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + + vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); + + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.power_off) { + if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } + + reset_state = &vcpu->arch.reset_state; + + reset_state->pc = smccc_get_arg2(source_vcpu); + + /* Propagate caller endianness */ + reset_state->be = kvm_vcpu_is_be(source_vcpu); + + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general purpose registers are undefined upon CPU_ON. + */ + reset_state->r0 = smccc_get_arg3(source_vcpu); + + WRITE_ONCE(reset_state->reset, true); + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + + /* + * Make sure the reset request is observed if the change to + * power_state is observed. + */ + smp_wmb(); + + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); + + return PSCI_RET_SUCCESS; +} + +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i, matching_cpus = 0; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = smccc_get_arg1(vcpu); + lowest_affinity_level = smccc_get_arg2(vcpu); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr_aff(tmp); + if ((mpidr & target_affinity_mask) == target_affinity) { + matching_cpus++; + if (!tmp->arch.power_off) + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + if (!matching_cpus) + return PSCI_RET_INVALID_PARAMS; + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) + tmp->arch.power_off = true; + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) +{ + int i; + + /* + * Zero the input registers' upper 32 bits. They will be fully + * zeroed on exit, so we're fine changing them in place. + */ + for (i = 1; i < 4; i++) + vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); +} + +static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) +{ + switch(fn) { + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN64_AFFINITY_INFO: + /* Disallow these functions for 32bit guests */ + if (vcpu_mode_is_32bit(vcpu)) + return PSCI_RET_NOT_SUPPORTED; + break; + } + + return 0; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + int ret = 1; + + val = kvm_psci_check_allowed_function(vcpu, psci_fn); + if (val) + goto out; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = KVM_ARM_PSCI_0_2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_CPU_ON: + mutex_lock(&kvm->lock); + val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We shouldn't be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidentally/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + +out: + smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; +} + +static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) +{ + u32 psci_fn = smccc_get_function(vcpu); + u32 feature; + unsigned long val; + int ret = 1; + + switch(psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + val = KVM_ARM_PSCI_1_0; + break; + case PSCI_1_0_FN_PSCI_FEATURES: + feature = smccc_get_arg1(vcpu); + val = kvm_psci_check_allowed_function(vcpu, feature); + if (val) + break; + + switch(feature) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_1_0_FN_PSCI_FEATURES: + case ARM_SMCCC_VERSION_FUNC_ID: + val = 0; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + break; + default: + return kvm_psci_0_2_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + mutex_lock(&kvm->lock); + val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ + switch (kvm_psci_version(vcpu, vcpu->kvm)) { + case KVM_ARM_PSCI_1_0: + return kvm_psci_1_0_call(vcpu); + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); + default: + return -EINVAL; + }; +} + +int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) +{ + return 3; /* PSCI version and two workaround registers */ +} + +int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) + return -EFAULT; + + return 0; +} + +#define KVM_REG_FEATURE_LEVEL_WIDTH 4 +#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) + +/* + * Convert the workaround level into an easy-to-compare number, where higher + * values mean better protection. + */ +static int get_kernel_wa_level(u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_BP_HARDEN_WA_NEEDED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; + case KVM_BP_HARDEN_NOT_REQUIRED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + case KVM_SSBD_KERNEL: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; + case KVM_SSBD_UNKNOWN: + default: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + } + } + + return -EINVAL; +} + +int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + val = kvm_psci_version(vcpu, vcpu->kvm); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + + if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + kvm_arm_get_vcpu_workaround_2_flag(vcpu)) + val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED; + break; + default: + return -ENOENT; + } + + if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int wa_level; + + if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + { + bool wants_02; + + wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); + + switch (val) { + case KVM_ARM_PSCI_0_1: + if (wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + case KVM_ARM_PSCI_0_2: + case KVM_ARM_PSCI_1_0: + if (!wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + } + break; + } + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + if (val & ~KVM_REG_FEATURE_LEVEL_MASK) + return -EINVAL; + + if (get_kernel_wa_level(reg->id) < val) + return -EINVAL; + + return 0; + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) + return -EINVAL; + + wa_level = val & KVM_REG_FEATURE_LEVEL_MASK; + + if (get_kernel_wa_level(reg->id) < wa_level) + return -EINVAL; + + /* The enabled bit must not be set unless the level is AVAIL. */ + if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + wa_level != val) + return -EINVAL; + + /* Are we finished or do we need to check the enable bit ? */ + if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL) + return 0; + + /* + * If this kernel supports the workaround to be switched on + * or off, make sure it matches the requested setting. + */ + switch (wa_level) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, + val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, true); + break; + } + + return 0; + default: + return -ENOENT; + } + + return -EINVAL; +} diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c new file mode 100644 index 000000000000..1e0f4c284888 --- /dev/null +++ b/arch/arm64/kvm/pvtime.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> +#include <asm/pvclock-abi.h> + +#include <kvm/arm_hypercalls.h> + +void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u64 steal; + __le64 steal_le; + u64 offset; + int idx; + u64 base = vcpu->arch.steal.base; + + if (base == GPA_INVALID) + return; + + /* Let's do the local bookkeeping */ + steal = vcpu->arch.steal.steal; + steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + vcpu->arch.steal.steal = steal; + + steal_le = cpu_to_le64(steal); + idx = srcu_read_lock(&kvm->srcu); + offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + kvm_put_guest(kvm, base + offset, steal_le, u64); + srcu_read_unlock(&kvm->srcu, idx); +} + +long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +} + +gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + struct pvclock_vcpu_stolen_time init_values = {}; + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; + int idx; + + if (base == GPA_INVALID) + return base; + + /* + * Start counting stolen time from the time the guest requests + * the feature enabled. + */ + vcpu->arch.steal.steal = 0; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + + idx = srcu_read_lock(&kvm->srcu); + kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); + srcu_read_unlock(&kvm->srcu, idx); + + return base; +} + +int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 ipa; + int ret = 0; + int idx; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + if (get_user(ipa, user)) + return -EFAULT; + if (!IS_ALIGNED(ipa, 64)) + return -EINVAL; + if (vcpu->arch.steal.base != GPA_INVALID) + return -EEXIST; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.steal.base = ipa; + + return ret; +} + +int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 ipa; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + ipa = vcpu->arch.steal.base; + + if (put_user(ipa, user)) + return -EFAULT; + return 0; +} + +int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PVTIME_IPA: + return 0; + } + return -ENXIO; +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 70cd7bcca433..d3b209023727 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -36,15 +36,11 @@ static u32 kvm_ipa_limit; /* * ARMv8 Reset Values */ -static const struct kvm_regs default_regs_reset = { - .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | - PSR_F_BIT | PSR_D_BIT), -}; +#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ + PSR_F_BIT | PSR_D_BIT) -static const struct kvm_regs default_regs_reset32 = { - .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | - PSR_AA32_I_BIT | PSR_AA32_F_BIT), -}; +#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ + PSR_AA32_I_BIT | PSR_AA32_F_BIT) /** * kvm_arch_vm_ioctl_check_extension @@ -155,7 +151,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) vl = vcpu->arch.sve_max_vl; /* - * Resposibility for these properties is shared between + * Responsibility for these properties is shared between * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ @@ -241,7 +237,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) * ioctl or as part of handling a request issued by another VCPU in the PSCI * handling code. In the first case, the VCPU will not be loaded, and in the * second case the VCPU will be loaded. Because this function operates purely - * on the memory-backed valus of system registers, we want to do a full put if + * on the memory-backed values of system registers, we want to do a full put if * we were loaded (handling a request) and load the values back at the end of * the function. Otherwise we leave the state alone. In both cases, we * disable preemption around the vcpu reset as we would otherwise race with @@ -249,9 +245,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_regs *cpu_reset; int ret = -EINVAL; bool loaded; + u32 pstate; /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -282,16 +278,17 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) goto out; - cpu_reset = &default_regs_reset32; + pstate = VCPU_RESET_PSTATE_SVC; } else { - cpu_reset = &default_regs_reset; + pstate = VCPU_RESET_PSTATE_EL1; } break; } /* Reset core registers */ - memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); + vcpu_gp_regs(vcpu)->regs.pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); @@ -388,7 +385,7 @@ int kvm_set_ipa_limit(void) * * So clamp the ipa limit further down to limit the number of levels. * Since we can concatenate upto 16 tables at entry level, we could - * go upto 4bits above the maximum VA addressible with the current + * go upto 4bits above the maximum VA addressable with the current * number of levels. */ va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7d7a39b01135..80985439bfb2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -34,7 +34,7 @@ #include "trace.h" /* - * All of this file is extremly similar to the ARM coproc.c, but the + * All of this file is extremely similar to the ARM coproc.c, but the * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. @@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_read; - /* * System registers listed in the switch are not saved on every * exit from the guest but are only saved on vcpu_put. @@ -79,75 +76,92 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) * thread when emulating cross-VCPU communication. */ switch (reg) { - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_read: - return __vcpu_sys_reg(vcpu, reg); + return true; } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_write; - /* * System registers listed in the switch are not restored on every * entry to the guest but are only restored on vcpu_load. * * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the the MPIDR should only be - * set once, before running the VCPU, and never changed later. + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. */ switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_write: + return true; +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val = 0x8badf00d8badf00d; + + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_write_sys_reg_to_cpu(val, reg)) + return; + __vcpu_sys_reg(vcpu, reg) = val; } @@ -1532,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, - { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1571,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, + { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, @@ -2073,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, + bool is_32) +{ + unsigned int i; + + for (i = 0; i < n; i++) { + if (!is_32 && table[i].reg && !table[i].reset) { + kvm_err("sys_reg table %p entry %d has lacks reset\n", + table, i); + return 1; + } + + if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + /* Target specific emulation tables */ static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table) { + if (check_sysreg_table(table->table64.table, table->table64.num, false) || + check_sysreg_table(table->table32.table, table->table32.num, true)) + return; + target_tables[target] = table; } @@ -2364,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, } static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num, - unsigned long *bmap) + const struct sys_reg_desc *table, size_t num) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) { - int reg = table[i].reg; - + if (table[i].reset) table[i].reset(vcpu, &table[i]); - if (reg > 0 && reg < NR_SYS_REGS) - set_bit(reg, bmap); - } } /** @@ -2832,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) -{ - unsigned int i; - - for (i = 1; i < n; i++) { - if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { - kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); - return 1; - } - } - - return 0; -} - void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); - BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); - BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); - BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); - BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); - BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false)); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true)); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true)); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true)); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true)); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false)); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) @@ -2893,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { size_t num; const struct sys_reg_desc *table; - DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num, bmap); - - for (num = 1; num < NR_SYS_REGS; num++) { - if (WARN(!test_bit(num, bmap), - "Didn't reset __vcpu_sys_reg(%zi)\n", num)) - break; - } + reset_sys_reg_descs(vcpu, table, num); } diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h index eab91ad0effb..86f9ea47be29 100644 --- a/arch/arm64/kvm/trace.h +++ b/arch/arm64/kvm/trace.h @@ -1,216 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#ifndef _TRACE_ARM64_KVM_H #define _TRACE_ARM64_KVM_H -#include <linux/tracepoint.h> -#include "sys_regs.h" +#include "trace_arm.h" +#include "trace_handle_exit.h" -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kvm - -TRACE_EVENT(kvm_wfx_arm64, - TP_PROTO(unsigned long vcpu_pc, bool is_wfe), - TP_ARGS(vcpu_pc, is_wfe), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_wfe) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_wfe = is_wfe; - ), - - TP_printk("guest executed wf%c at: 0x%08lx", - __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) -); - -TRACE_EVENT(kvm_hvc_arm64, - TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), - TP_ARGS(vcpu_pc, r0, imm), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(unsigned long, r0) - __field(unsigned long, imm) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->r0 = r0; - __entry->imm = imm; - ), - - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", - __entry->vcpu_pc, __entry->r0, __entry->imm) -); - -TRACE_EVENT(kvm_arm_setup_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_clear_debug, - TP_PROTO(__u32 guest_debug), - TP_ARGS(guest_debug), - - TP_STRUCT__entry( - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->guest_debug = guest_debug; - ), - - TP_printk("flags: 0x%08x", __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_set_dreg32, - TP_PROTO(const char *name, __u32 value), - TP_ARGS(name, value), - - TP_STRUCT__entry( - __field(const char *, name) - __field(__u32, value) - ), - - TP_fast_assign( - __entry->name = name; - __entry->value = value; - ), - - TP_printk("%s: 0x%08x", __entry->name, __entry->value) -); - -TRACE_DEFINE_SIZEOF(__u64); - -TRACE_EVENT(kvm_arm_set_regset, - TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), - TP_ARGS(type, len, control, value), - TP_STRUCT__entry( - __field(const char *, name) - __field(int, len) - __array(u64, ctrls, 16) - __array(u64, values, 16) - ), - TP_fast_assign( - __entry->name = type; - __entry->len = len; - memcpy(__entry->ctrls, control, len << 3); - memcpy(__entry->values, value, len << 3); - ), - TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, - __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), - __print_array(__entry->values, __entry->len, sizeof(__u64))) -); - -TRACE_EVENT(trap_reg, - TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), - TP_ARGS(fn, reg, is_write, write_value), - - TP_STRUCT__entry( - __field(const char *, fn) - __field(int, reg) - __field(bool, is_write) - __field(u64, write_value) - ), - - TP_fast_assign( - __entry->fn = fn; - __entry->reg = reg; - __entry->is_write = is_write; - __entry->write_value = write_value; - ), - - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) -); - -TRACE_EVENT(kvm_handle_sys_reg, - TP_PROTO(unsigned long hsr), - TP_ARGS(hsr), - - TP_STRUCT__entry( - __field(unsigned long, hsr) - ), - - TP_fast_assign( - __entry->hsr = hsr; - ), - - TP_printk("HSR 0x%08lx", __entry->hsr) -); - -TRACE_EVENT(kvm_sys_access, - TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), - TP_ARGS(vcpu_pc, params, reg), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_write) - __field(const char *, name) - __field(u8, Op0) - __field(u8, Op1) - __field(u8, CRn) - __field(u8, CRm) - __field(u8, Op2) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_write = params->is_write; - __entry->name = reg->name; - __entry->Op0 = reg->Op0; - __entry->Op0 = reg->Op0; - __entry->Op1 = reg->Op1; - __entry->CRn = reg->CRn; - __entry->CRm = reg->CRm; - __entry->Op2 = reg->Op2; - ), - - TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", - __entry->vcpu_pc, __entry->name ?: "UNKN", - __entry->Op0, __entry->Op1, __entry->CRn, - __entry->CRm, __entry->Op2, - __entry->is_write ? "write" : "read") -); - -TRACE_EVENT(kvm_set_guest_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - - -#endif /* _TRACE_ARM64_KVM_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace - -/* This part must be outside protection */ -#include <trace/define_trace.h> +#endif /* _TRACE_ARM64_KVM_H */ diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h new file mode 100644 index 000000000000..4c71270cc097 --- /dev/null +++ b/arch/arm64/kvm/trace_arm.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM_ARM64_KVM_H + +#include <kvm/arm_arch_timer.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for entry/exit to guest + */ +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(int ret, unsigned int esr_ec, unsigned long vcpu_pc), + TP_ARGS(ret, esr_ec, vcpu_pc), + + TP_STRUCT__entry( + __field( int, ret ) + __field( unsigned int, esr_ec ) + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->ret = ARM_EXCEPTION_CODE(ret); + __entry->esr_ec = ARM_EXCEPTION_IS_TRAP(ret) ? esr_ec : 0; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + __print_symbolic(__entry->ret, kvm_arm_exception_type), + __entry->esr_ec, + __print_symbolic(__entry->esr_ec, kvm_arm_exception_class), + __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_guest_fault, + TP_PROTO(unsigned long vcpu_pc, unsigned long hsr, + unsigned long hxfar, + unsigned long long ipa), + TP_ARGS(vcpu_pc, hsr, hxfar, ipa), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, hsr ) + __field( unsigned long, hxfar ) + __field( unsigned long long, ipa ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->hsr = hsr; + __entry->hxfar = hxfar; + __entry->ipa = ipa; + ), + + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; + __entry->level = level; + ), + + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + +TRACE_EVENT(kvm_mmio_emulate, + TP_PROTO(unsigned long vcpu_pc, unsigned long instr, + unsigned long cpsr), + TP_ARGS(vcpu_pc, instr, cpsr), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, instr ) + __field( unsigned long, cpsr ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->instr = instr; + __entry->cpsr = cpsr; + ), + + TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + __entry->vcpu_pc, __entry->instr, __entry->cpsr) +); + +TRACE_EVENT(kvm_unmap_hva_range, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_set_spte_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_set_way_flush, + TP_PROTO(unsigned long vcpu_pc, bool cache), + TP_ARGS(vcpu_pc, cache), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, cache ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->cache = cache; + ), + + TP_printk("S/W flush at 0x%016lx (cache %s)", + __entry->vcpu_pc, __entry->cache ? "on" : "off") +); + +TRACE_EVENT(kvm_toggle_cache, + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), + TP_ARGS(vcpu_pc, was, now), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, was ) + __field( bool, now ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->was = was; + __entry->now = now; + ), + + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", + __entry->vcpu_pc, __entry->was ? "on" : "off", + __entry->now ? "on" : "off") +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_update_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +TRACE_EVENT(kvm_get_timer_map, + TP_PROTO(unsigned long vcpu_id, struct timer_map *map), + TP_ARGS(vcpu_id, map), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( int, direct_vtimer ) + __field( int, direct_ptimer ) + __field( int, emul_ptimer ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); + __entry->direct_ptimer = + (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_ptimer = + (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; + ), + + TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + __entry->vcpu_id, + __entry->direct_vtimer, + __entry->direct_ptimer, + __entry->emul_ptimer) +); + +TRACE_EVENT(kvm_timer_save_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_restore_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_hrtimer_expire, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_emulate, + TP_PROTO(struct arch_timer_context *ctx, bool should_fire), + TP_ARGS(ctx, should_fire), + + TP_STRUCT__entry( + __field( int, timer_idx ) + __field( bool, should_fire ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + __entry->should_fire = should_fire; + ), + + TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", + __entry->timer_idx, __entry->should_fire) +); + +#endif /* _TRACE_ARM_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_arm + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h new file mode 100644 index 000000000000..2c56d1e0f5bd --- /dev/null +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HANDLE_EXIT_ARM64_KVM_H + +#include <linux/tracepoint.h> +#include "sys_regs.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(kvm_wfx_arm64, + TP_PROTO(unsigned long vcpu_pc, bool is_wfe), + TP_ARGS(vcpu_pc, is_wfe), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_wfe) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_wfe = is_wfe; + ), + + TP_printk("guest executed wf%c at: 0x%08lx", + __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_hvc_arm64, + TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), + TP_ARGS(vcpu_pc, r0, imm), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(unsigned long, r0) + __field(unsigned long, imm) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->r0 = r0; + __entry->imm = imm; + ), + + TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + __entry->vcpu_pc, __entry->r0, __entry->imm) +); + +TRACE_EVENT(kvm_arm_setup_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_clear_debug, + TP_PROTO(__u32 guest_debug), + TP_ARGS(guest_debug), + + TP_STRUCT__entry( + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->guest_debug = guest_debug; + ), + + TP_printk("flags: 0x%08x", __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_set_dreg32, + TP_PROTO(const char *name, __u32 value), + TP_ARGS(name, value), + + TP_STRUCT__entry( + __field(const char *, name) + __field(__u32, value) + ), + + TP_fast_assign( + __entry->name = name; + __entry->value = value; + ), + + TP_printk("%s: 0x%08x", __entry->name, __entry->value) +); + +TRACE_DEFINE_SIZEOF(__u64); + +TRACE_EVENT(kvm_arm_set_regset, + TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), + TP_ARGS(type, len, control, value), + TP_STRUCT__entry( + __field(const char *, name) + __field(int, len) + __array(u64, ctrls, 16) + __array(u64, values, 16) + ), + TP_fast_assign( + __entry->name = type; + __entry->len = len; + memcpy(__entry->ctrls, control, len << 3); + memcpy(__entry->values, value, len << 3); + ), + TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, + __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), + __print_array(__entry->values, __entry->len, sizeof(__u64))) +); + +TRACE_EVENT(trap_reg, + TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), + TP_ARGS(fn, reg, is_write, write_value), + + TP_STRUCT__entry( + __field(const char *, fn) + __field(int, reg) + __field(bool, is_write) + __field(u64, write_value) + ), + + TP_fast_assign( + __entry->fn = fn; + __entry->reg = reg; + __entry->is_write = is_write; + __entry->write_value = write_value; + ), + + TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) +); + +TRACE_EVENT(kvm_handle_sys_reg, + TP_PROTO(unsigned long hsr), + TP_ARGS(hsr), + + TP_STRUCT__entry( + __field(unsigned long, hsr) + ), + + TP_fast_assign( + __entry->hsr = hsr; + ), + + TP_printk("HSR 0x%08lx", __entry->hsr) +); + +TRACE_EVENT(kvm_sys_access, + TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), + TP_ARGS(vcpu_pc, params, reg), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_write) + __field(const char *, name) + __field(u8, Op0) + __field(u8, Op1) + __field(u8, CRn) + __field(u8, CRm) + __field(u8, Op2) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_write = params->is_write; + __entry->name = reg->name; + __entry->Op0 = reg->Op0; + __entry->Op0 = reg->Op0; + __entry->Op1 = reg->Op1; + __entry->CRn = reg->CRn; + __entry->CRm = reg->CRm; + __entry->Op2 = reg->Op2; + ), + + TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", + __entry->vcpu_pc, __entry->name ?: "UNKN", + __entry->Op0, __entry->Op1, __entry->CRn, + __entry->CRm, __entry->Op2, + __entry->is_write ? "write" : "read") +); + +TRACE_EVENT(kvm_set_guest_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_handle_exit + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index e7d1ea92095d..2f92bdcb1188 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -7,7 +7,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> -#include "vgic.h" +#include "vgic/vgic.h" #include "sys_regs.h" static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, diff --git a/arch/arm64/kvm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h new file mode 100644 index 000000000000..83c64401a7fc --- /dev/null +++ b/arch/arm64/kvm/vgic/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VGIC_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_VGIC_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c new file mode 100644 index 000000000000..b13a9e3f99dd --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Linaro + * Author: Christoffer Dall <christoffer.dall@linaro.org> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/seq_file.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Structure to control looping through the entire vgic state. We start at + * zero for each field and move upwards. So, if dist_id is 0 we print the + * distributor info. When dist_id is 1, we have already printed it and move + * on. + * + * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and + * so on. + */ +struct vgic_state_iter { + int nr_cpus; + int nr_spis; + int nr_lpis; + int dist_id; + int vcpu_id; + int intid; + int lpi_idx; + u32 *lpi_array; +}; + +static void iter_next(struct vgic_state_iter *iter) +{ + if (iter->dist_id == 0) { + iter->dist_id++; + return; + } + + iter->intid++; + if (iter->intid == VGIC_NR_PRIVATE_IRQS && + ++iter->vcpu_id < iter->nr_cpus) + iter->intid = 0; + + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (iter->lpi_idx < iter->nr_lpis) + iter->intid = iter->lpi_array[iter->lpi_idx]; + iter->lpi_idx++; + } +} + +static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, + loff_t pos) +{ + int nr_cpus = atomic_read(&kvm->online_vcpus); + + memset(iter, 0, sizeof(*iter)); + + iter->nr_cpus = nr_cpus; + iter->nr_spis = kvm->arch.vgic.nr_spis; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array); + if (iter->nr_lpis < 0) + iter->nr_lpis = 0; + } + + /* Fast forward to the right position if needed */ + while (pos--) + iter_next(iter); +} + +static bool end_of_vgic(struct vgic_state_iter *iter) +{ + return iter->dist_id > 0 && + iter->vcpu_id == iter->nr_cpus && + iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && + iter->lpi_idx > iter->nr_lpis; +} + +static void *vgic_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + if (iter) { + iter = ERR_PTR(-EBUSY); + goto out; + } + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + iter = ERR_PTR(-ENOMEM); + goto out; + } + + iter_init(kvm, iter, *pos); + kvm->arch.vgic.iter = iter; + + if (end_of_vgic(iter)) + iter = NULL; +out: + mutex_unlock(&kvm->lock); + return iter; +} + +static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = kvm->arch.vgic.iter; + + ++*pos; + iter_next(iter); + if (end_of_vgic(iter)) + iter = NULL; + return iter; +} + +static void vgic_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + /* + * If the seq file wasn't properly opened, there's nothing to clearn + * up. + */ + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + kfree(iter->lpi_array); + kfree(iter); + kvm->arch.vgic.iter = NULL; + mutex_unlock(&kvm->lock); +} + +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +{ + bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + + seq_printf(s, "Distributor\n"); + seq_printf(s, "===========\n"); + seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); + seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + if (v3) + seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); + seq_printf(s, "enabled:\t%d\n", dist->enabled); + seq_printf(s, "\n"); + + seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); + seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); + seq_printf(s, "G=group\n"); +} + +static void print_header(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + int id = 0; + char *hdr = "SPI "; + + if (vcpu) { + hdr = "VCPU"; + id = vcpu->vcpu_id; + } + + seq_printf(s, "\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "----------------------------------------------------------------\n"); +} + +static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + char *type; + bool pending; + + if (irq->intid < VGIC_NR_SGIS) + type = "SGI"; + else if (irq->intid < VGIC_NR_PRIVATE_IRQS) + type = "PPI"; + else if (irq->intid < VGIC_MAX_SPI) + type = "SPI"; + else + type = "LPI"; + + if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) + print_header(s, irq, vcpu); + + pending = irq->pending_latch; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &pending); + WARN_ON_ONCE(err); + } + + seq_printf(s, " %s %4d " + " %2d " + "%d%d%d%d%d%d%d " + "%8d " + "%8x " + " %2x " + "%3d " + " %2d " + "\n", + type, irq->intid, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + pending, + irq->line_level, + irq->active, + irq->enabled, + irq->hw, + irq->config == VGIC_CONFIG_LEVEL, + irq->group, + irq->hwintid, + irq->mpidr, + irq->source, + irq->priority, + (irq->vcpu) ? irq->vcpu->vcpu_id : -1); +} + +static int vgic_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct vgic_irq *irq; + struct kvm_vcpu *vcpu = NULL; + unsigned long flags; + + if (iter->dist_id == 0) { + print_dist_state(s, &kvm->arch.vgic); + return 0; + } + + if (!kvm->arch.vgic.initialized) + return 0; + + if (iter->vcpu_id < iter->nr_cpus) + vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + + irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (!irq) { + seq_printf(s, " LPI %4d freed\n", iter->intid); + return 0; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + print_irq_state(s, irq, vcpu); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(kvm, irq); + return 0; +} + +static const struct seq_operations vgic_debug_seq_ops = { + .start = vgic_debug_start, + .next = vgic_debug_next, + .stop = vgic_debug_stop, + .show = vgic_debug_show +}; + +static int debug_open(struct inode *inode, struct file *file) +{ + int ret; + ret = seq_open(file, &vgic_debug_seq_ops); + if (!ret) { + struct seq_file *seq; + /* seq_open will have modified file->private_data */ + seq = file->private_data; + seq->private = inode->i_private; + } + + return ret; +}; + +static const struct file_operations vgic_debug_fops = { + .owner = THIS_MODULE, + .open = debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +void vgic_debug_init(struct kvm *kvm) +{ + debugfs_create_file("vgic-state", 0444, kvm->debugfs_dentry, kvm, + &vgic_debug_fops); +} + +void vgic_debug_destroy(struct kvm *kvm) +{ +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c new file mode 100644 index 000000000000..32e32d67a127 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Initialization rules: there are multiple stages to the vgic + * initialization, both for the distributor and the CPU interfaces. The basic + * idea is that even though the VGIC is not functional or not requested from + * user space, the critical path of the run loop can still call VGIC functions + * that just won't do anything, without them having to check additional + * initialization flags to ensure they don't look at uninitialized data + * structures. + * + * Distributor: + * + * - kvm_vgic_early_init(): initialization of static data that doesn't + * depend on any sizing information or emulation type. No allocation + * is allowed there. + * + * - vgic_init(): allocation and initialization of the generic data + * structures that depend on sizing information (number of CPUs, + * number of interrupts). Also initializes the vcpu specific data + * structures. Can be executed lazily for GICv2. + * + * CPU Interface: + * + * - kvm_vgic_vcpu_init(): initialization of static data that + * doesn't depend on any sizing information or emulation type. No + * allocation is allowed there. + */ + +/* EARLY INIT */ + +/** + * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures + * @kvm: The VM whose VGIC districutor should be initialized + * + * Only do initialization of static structures that don't require any + * allocation or sizing information from userspace. vgic_init() called + * kvm_vgic_dist_init() which takes care of the rest. + */ +void kvm_vgic_early_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + INIT_LIST_HEAD(&dist->lpi_list_head); + INIT_LIST_HEAD(&dist->lpi_translation_cache); + raw_spin_lock_init(&dist->lpi_list_lock); +} + +/* CREATION */ + +/** + * kvm_vgic_create: triggered by the instantiation of the VGIC device by + * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) + * or through the generic KVM_CREATE_DEVICE API ioctl. + * irqchip_in_kernel() tells you if this function succeeded or not. + * @kvm: kvm struct pointer + * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] + */ +int kvm_vgic_create(struct kvm *kvm, u32 type) +{ + int i, ret; + struct kvm_vcpu *vcpu; + + if (irqchip_in_kernel(kvm)) + return -EEXIST; + + /* + * This function is also called by the KVM_CREATE_IRQCHIP handler, + * which had no chance yet to check the availability of the GICv2 + * emulation. So check this here again. KVM_CREATE_DEVICE does + * the proper checks already. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && + !kvm_vgic_global_state.can_emulate_gicv2) + return -ENODEV; + + ret = -EBUSY; + if (!lock_all_vcpus(kvm)) + return ret; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.has_run_once) + goto out_unlock; + } + ret = 0; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; + else + kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS; + + if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) { + ret = -E2BIG; + goto out_unlock; + } + + kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; + + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + else + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + +out_unlock: + unlock_all_vcpus(kvm); + return ret; +} + +/* INIT/DESTROY */ + +/** + * kvm_vgic_dist_init: initialize the dist data structures + * @kvm: kvm struct pointer + * @nr_spis: number of spis, frozen by caller + */ +static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); + int i; + + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); + if (!dist->spis) + return -ENOMEM; + + /* + * In the following code we do not take the irq struct lock since + * no other action on irq structs can happen while the VGIC is + * not initialized yet: + * If someone wants to inject an interrupt or does a MMIO access, we + * require prior initialization in case of a virtual GICv3 or trigger + * initialization when using a virtual GICv2. + */ + for (i = 0; i < nr_spis; i++) { + struct vgic_irq *irq = &dist->spis[i]; + + irq->intid = i + VGIC_NR_PRIVATE_IRQS; + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->vcpu = NULL; + irq->target_vcpu = vcpu0; + kref_init(&irq->refcount); + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->targets = 0; + irq->group = 0; + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->mpidr = 0; + irq->group = 1; + break; + default: + kfree(dist->spis); + dist->spis = NULL; + return -EINVAL; + } + } + return 0; +} + +/** + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * + * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface + */ +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + int i; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + /* + * If we are creating a VCPU with a GICv3 we must also register the + * KVM io device for the redistributor that belongs to this VCPU. + */ + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->lock); + ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->lock); + } + return ret; +} + +static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_enable(vcpu); + else + vgic_v3_enable(vcpu); +} + +/* + * vgic_init: allocates and initializes dist and vcpu data structures + * depending on two dimensioning parameters: + * - the number of spis + * - the number of vcpus + * The function is generally called when nr_spis has been explicitly set + * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. + * vgic_initialized() returns true when this function has succeeded. + * Must be called with kvm->lock held! + */ +int vgic_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0, i, idx; + + if (vgic_initialized(kvm)) + return 0; + + /* Are we also in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + + /* freeze the number of spis */ + if (!dist->nr_spis) + dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; + + ret = kvm_vgic_dist_init(kvm, dist->nr_spis); + if (ret) + goto out; + + /* Initialize groups on CPUs created before the VGIC type was known */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->group = 1; + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } + } + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_init(kvm); + + /* + * If we have GICv4.1 enabled, unconditionnaly request enable the + * v4 support so that we get HW-accelerated vSGIs. Otherwise, only + * enable it if we present a virtual ITS to the guest. + */ + if (vgic_supports_direct_msis(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_enable(vcpu); + + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + + vgic_debug_init(kvm); + + dist->implementation_rev = 2; + dist->initialized = true; + +out: + return ret; +} + +static void kvm_vgic_dist_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_redist_region *rdreg, *next; + + dist->ready = false; + dist->initialized = false; + + kfree(dist->spis); + dist->spis = NULL; + dist->nr_spis = 0; + + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) { + list_del(&rdreg->list); + kfree(rdreg); + } + INIT_LIST_HEAD(&dist->rd_regions); + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_destroy(kvm); + + if (vgic_supports_direct_msis(kvm)) + vgic_v4_teardown(kvm); +} + +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Retire all pending LPIs on this vcpu anyway as we're + * going to destroy it. + */ + vgic_flush_pending_lpis(vcpu); + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); +} + +/* To be called with kvm->lock held */ +static void __kvm_vgic_destroy(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + vgic_debug_destroy(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_destroy(vcpu); + + kvm_vgic_dist_destroy(kvm); +} + +void kvm_vgic_destroy(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + __kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); +} + +/** + * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest + * is a GICv2. A GICv3 must be explicitly initialized by the guest using the + * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. + * @kvm: kvm struct pointer + */ +int vgic_lazy_init(struct kvm *kvm) +{ + int ret = 0; + + if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) + return -EBUSY; + + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + } + + return ret; +} + +/* RESOURCE MAPPING */ + +/** + * Map the MMIO regions depending on the VGIC model exposed to the guest + * called on the first VCPU run. + * Also map the virtual CPU interface into the VM. + * v2/v3 derivatives call vgic_init if not already done. + * vgic_ready() returns true if this function has succeeded. + * @kvm: kvm struct pointer + */ +int kvm_vgic_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret = 0; + + mutex_lock(&kvm->lock); + if (!irqchip_in_kernel(kvm)) + goto out; + + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + ret = vgic_v2_map_resources(kvm); + else + ret = vgic_v3_map_resources(kvm); + + if (ret) + __kvm_vgic_destroy(kvm); + +out: + mutex_unlock(&kvm->lock); + return ret; +} + +/* GENERIC PROBE */ + +static int vgic_init_cpu_starting(unsigned int cpu) +{ + enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); + return 0; +} + + +static int vgic_init_cpu_dying(unsigned int cpu) +{ + disable_percpu_irq(kvm_vgic_global_state.maint_irq); + return 0; +} + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + /* + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_fold_lr_state). + */ + return IRQ_HANDLED; +} + +/** + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware + * + * For a specific CPU, initialize the GIC VE hardware. + */ +void kvm_vgic_init_cpu_hardware(void) +{ + BUG_ON(preemptible()); + + /* + * We want to make sure the list registers start out clear so that we + * only have the program the used registers. + */ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_init_lrs(); + else + kvm_call_hyp(__vgic_v3_init_lrs); +} + +/** + * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable + * according to the host GIC model. Accordingly calls either + * vgic_v2/v3_probe which registers the KVM_DEVICE that can be + * instantiated by a guest later on . + */ +int kvm_vgic_hyp_init(void) +{ + const struct gic_kvm_info *gic_kvm_info; + int ret; + + gic_kvm_info = gic_get_kvm_info(); + if (!gic_kvm_info) + return -ENODEV; + + if (!gic_kvm_info->maint_irq) { + kvm_err("No vgic maintenance irq\n"); + return -ENXIO; + } + + switch (gic_kvm_info->type) { + case GIC_V2: + ret = vgic_v2_probe(gic_kvm_info); + break; + case GIC_V3: + ret = vgic_v3_probe(gic_kvm_info); + if (!ret) { + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GIC system register CPU interface enabled\n"); + } + break; + default: + ret = -ENODEV; + } + + if (ret) + return ret; + + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, + vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", + kvm_vgic_global_state.maint_irq); + return ret; + } + + ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, + "kvm/arm/vgic:starting", + vgic_init_cpu_starting, vgic_init_cpu_dying); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); + return 0; + +out_free_irq: + free_percpu_irq(kvm_vgic_global_state.maint_irq, + kvm_get_running_vcpus()); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c new file mode 100644 index 000000000000..d8cdfea5cc96 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> +#include <kvm/arm_vgic.h> +#include "vgic.h" + +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); +} + +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + e->msi.flags = ue->flags; + e->msi.devid = ue->u.msi.devid; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm_msi *msi) +{ + msi->address_lo = e->msi.address_lo; + msi->address_hi = e->msi.address_hi; + msi->data = e->msi.data; + msi->flags = e->msi.flags; + msi->devid = e->msi.devid; +} +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + struct kvm_msi msi; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + if (!level) + return -1; + + kvm_populate_msi(e, &msi); + return vgic_its_inject_msi(kvm, &msi); +} + +/** + * kvm_arch_set_irq_inatomic: fast-path for irqfd injection + * + * Currently only direct MSI injection is supported. + */ +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + struct kvm_msi msi; + + kvm_populate_msi(e, &msi); + if (!vgic_its_inject_cached_translation(kvm, &msi)) + return 0; + } + + return -EWOULDBLOCK; +} + +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c new file mode 100644 index 000000000000..c012a52b19f5 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -0,0 +1,2783 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GICv3 ITS emulation + * + * Copyright (C) 2015,2016 ARM Ltd. + * Author: Andre Przywara <andre.przywara@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/uaccess.h> +#include <linux/list_sort.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +static int vgic_its_save_tables_v0(struct vgic_its *its); +static int vgic_its_restore_tables_v0(struct vgic_its *its); +static int vgic_its_commit_v0(struct vgic_its *its); +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv); + +/* + * Creates a new (reference to a) struct vgic_irq for a given LPI. + * If this LPI is already mapped on another ITS, we increase its refcount + * and return a pointer to the existing structure. + * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. + * This function returns a pointer to the _unlocked_ structure. + */ +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, + struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + unsigned long flags; + int ret; + + /* In this case there is no put, since we keep the reference. */ + if (irq) + return irq; + + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + if (!irq) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&irq->lpi_list); + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + + irq->config = VGIC_CONFIG_EDGE; + kref_init(&irq->refcount); + irq->intid = intid; + irq->target_vcpu = vcpu; + irq->group = 1; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + /* + * There could be a race with another vgic_add_lpi(), so we need to + * check that we don't add a second list entry with the same LPI. + */ + list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { + if (oldirq->intid != intid) + continue; + + /* Someone was faster with adding this LPI, lets use that. */ + kfree(irq); + irq = oldirq; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() on the returned pointer once it's + * finished with the IRQ. + */ + vgic_get_irq_kref(irq); + + goto out_unlock; + } + + list_add_tail(&irq->lpi_list, &dist->lpi_list_head); + dist->lpi_list_count++; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + /* + * We "cache" the configuration table entries in our struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + * + * Should any of these fail, behave as if we couldn't create the LPI + * by dropping the refcount and returning the error. + */ + ret = update_lpi_config(kvm, irq, NULL, false); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + ret = vgic_v3_lpi_sync_pending_status(kvm, irq); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + return irq; +} + +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_ite { + struct list_head ite_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 event_id; +}; + +struct vgic_translation_cache_entry { + struct list_head entry; + phys_addr_t db; + u32 devid; + u32 eventid; + struct vgic_irq *irq; +}; + +/** + * struct vgic_its_abi - ITS abi ops and settings + * @cte_esz: collection table entry size + * @dte_esz: device table entry size + * @ite_esz: interrupt translation table entry size + * @save tables: save the ITS tables into guest RAM + * @restore_tables: restore the ITS internal structs from tables + * stored in guest RAM + * @commit: initialize the registers which expose the ABI settings, + * especially the entry sizes + */ +struct vgic_its_abi { + int cte_esz; + int dte_esz; + int ite_esz; + int (*save_tables)(struct vgic_its *its); + int (*restore_tables)(struct vgic_its *its); + int (*commit)(struct vgic_its *its); +}; + +#define ABI_0_ESZ 8 +#define ESZ_MAX ABI_0_ESZ + +static const struct vgic_its_abi its_table_abi_versions[] = { + [0] = { + .cte_esz = ABI_0_ESZ, + .dte_esz = ABI_0_ESZ, + .ite_esz = ABI_0_ESZ, + .save_tables = vgic_its_save_tables_v0, + .restore_tables = vgic_its_restore_tables_v0, + .commit = vgic_its_commit_v0, + }, +}; + +#define NR_ITS_ABIS ARRAY_SIZE(its_table_abi_versions) + +inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its) +{ + return &its_table_abi_versions[its->abi_rev]; +} + +static int vgic_its_set_abi(struct vgic_its *its, u32 rev) +{ + const struct vgic_its_abi *abi; + + its->abi_rev = rev; + abi = vgic_its_get_abi(its); + return abi->commit(its); +} + +/* + * Find and returns a device in the device table for an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) +{ + struct its_device *device; + + list_for_each_entry(device, &its->device_list, dev_list) + if (device_id == device->device_id) + return device; + + return NULL; +} + +/* + * Find and returns an interrupt translation table entry (ITTE) for a given + * Device ID/Event ID pair on an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, + u32 event_id) +{ + struct its_device *device; + struct its_ite *ite; + + device = find_its_device(its, device_id); + if (device == NULL) + return NULL; + + list_for_each_entry(ite, &device->itt_head, ite_list) + if (ite->event_id == event_id) + return ite; + + return NULL; +} + +/* To be used as an iterator this macro misses the enclosing parentheses */ +#define for_each_lpi_its(dev, ite, its) \ + list_for_each_entry(dev, &(its)->device_list, dev_list) \ + list_for_each_entry(ite, &(dev)->itt_head, ite_list) + +#define GIC_LPI_OFFSET 8192 + +#define VITS_TYPER_IDBITS 16 +#define VITS_TYPER_DEVBITS 16 +#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1) +#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1) + +/* + * Finds and returns a collection in the ITS collection table. + * Must be called with the its_lock mutex held. + */ +static struct its_collection *find_collection(struct vgic_its *its, int coll_id) +{ + struct its_collection *collection; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + if (coll_id == collection->collection_id) + return collection; + } + + return NULL; +} + +#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) +#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) + +/* + * Reads the configuration data for a given LPI from guest memory and + * updates the fields in struct vgic_irq. + * If filter_vcpu is not NULL, applies only if the IRQ is targeting this + * VCPU. Unconditionally applies if filter_vcpu is NULL. + */ +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv) +{ + u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u8 prop; + int ret; + unsigned long flags; + + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); + + if (ret) + return ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { + irq->priority = LPI_PROP_PRIORITY(prop); + irq->enabled = LPI_PROP_ENABLE_BIT(prop); + + if (!irq->hw) { + vgic_queue_irq_unlock(kvm, irq, flags); + return 0; + } + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); + + return 0; +} + +/* + * Create a snapshot of the current LPIs targeting @vcpu, so that we can + * enumerate those LPIs without holding any lock. + * Returns their number and puts the kmalloc'ed array into intid_ptr. + */ +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + u32 *intids; + int irq_count, i = 0; + + /* + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. + */ + irq_count = READ_ONCE(dist->lpi_list_count); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + if (!intids) + return -ENOMEM; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; + /* We don't need to "get" the IRQ, as we hold the list lock. */ + if (vcpu && irq->target_vcpu != vcpu) + continue; + intids[i++] = irq->intid; + } + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + *intid_ptr = intids; + return i; +} + +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ + int ret = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->target_vcpu = vcpu; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) { + struct its_vlpi_map map; + + ret = its_get_vlpi(irq->host_irq, &map); + if (ret) + return ret; + + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); + + ret = its_map_vlpi(irq->host_irq, &map); + } + + return ret; +} + +/* + * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI + * is targeting) to the VGIC's view, which deals with target VCPUs. + * Needs to be called whenever either the collection for a LPIs has + * changed or the collection itself got retargeted. + */ +static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) +{ + struct kvm_vcpu *vcpu; + + if (!its_is_collection_mapped(ite->collection)) + return; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + update_affinity(ite->irq, vcpu); +} + +/* + * Updates the target VCPU for every LPI targeting this collection. + * Must be called with the its_lock mutex held. + */ +static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, + struct its_collection *coll) +{ + struct its_device *device; + struct its_ite *ite; + + for_each_lpi_its(device, ite, its) { + if (!ite->collection || coll != ite->collection) + continue; + + update_affinity_ite(kvm, ite); + } +} + +static u32 max_lpis_propbaser(u64 propbaser) +{ + int nr_idbits = (propbaser & 0x1f) + 1; + + return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); +} + +/* + * Sync the pending table pending bit of LPIs targeting @vcpu + * with our own data structures. This relies on the LPI being + * mapped before. + */ +static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) +{ + gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_irq *irq; + int last_byte_offset = -1; + int ret = 0; + u32 *intids; + int nr_irqs, i; + unsigned long flags; + u8 pendmask; + + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids); + if (nr_irqs < 0) + return nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + int byte_offset, bit_nr; + + byte_offset = intids[i] / BITS_PER_BYTE; + bit_nr = intids[i] % BITS_PER_BYTE; + + /* + * For contiguously allocated LPIs chances are we just read + * this very same byte in the last iteration. Reuse that. + */ + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); + if (ret) { + kfree(intids); + return ret; + } + last_byte_offset = byte_offset; + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } + + kfree(intids); + + return ret; +} + +static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 reg = GITS_TYPER_PLPIS; + + /* + * We use linear CPU numbers for redistributor addressing, + * so GITS_TYPER.PTA is 0. + * Also we force all PROPBASER registers to be the same, so + * CommonLPIAff is 0 as well. + * To avoid memory waste in the guest, we keep the number of IDBits and + * DevBits low - as least for the time being. + */ + reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT; + reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT; + reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT; + + return extract_bytes(reg, addr & 7, len); +} + +static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 val; + + val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK; + val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM; + return val; +} + +static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 rev = GITS_IIDR_REV(val); + + if (rev >= NR_ITS_ABIS) + return -EINVAL; + return vgic_its_set_abi(its, rev); +} + +static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GITS_PIDR0: + return 0x92; /* part number, bits[7:0] */ + case GITS_PIDR1: + return 0xb4; /* part number, bits[11:8] */ + case GITS_PIDR2: + return GIC_PIDR2_ARCH_GICv3 | 0x0b; + case GITS_PIDR4: + return 0x40; /* This is a 64K software visible page */ + /* The following are the ID registers for (any) GIC. */ + case GITS_CIDR0: + return 0x0d; + case GITS_CIDR1: + return 0xf0; + case GITS_CIDR2: + return 0x05; + case GITS_CIDR3: + return 0xb1; + } + + return 0; +} + +static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, + phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_translation_cache_entry *cte; + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + if (cte->db != db || cte->devid != devid || + cte->eventid != eventid) + continue; + + /* + * Move this entry to the head, as it is the most + * recently used. + */ + if (!list_is_first(&cte->entry, &dist->lpi_translation_cache)) + list_move(&cte->entry, &dist->lpi_translation_cache); + + return cte->irq; + } + + return NULL; +} + +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + irq = __vgic_its_check_cache(dist, db, devid, eventid); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, + struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + phys_addr_t db; + + /* Do not cache a directly injected interrupt */ + if (irq->hw) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + if (unlikely(list_empty(&dist->lpi_translation_cache))) + goto out; + + /* + * We could have raced with another CPU caching the same + * translation behind our back, so let's check it is not in + * already + */ + db = its->vgic_its_base + GITS_TRANSLATER; + if (__vgic_its_check_cache(dist, db, devid, eventid)) + goto out; + + /* Always reuse the last entry (LRU policy) */ + cte = list_last_entry(&dist->lpi_translation_cache, + typeof(*cte), entry); + + /* + * Caching the translation implies having an extra reference + * to the interrupt, so drop the potential reference on what + * was in the cache, and increment it on the new interrupt. + */ + if (cte->irq) + __vgic_put_lpi_locked(kvm, cte->irq); + + vgic_get_irq_kref(irq); + + cte->db = db; + cte->devid = devid; + cte->eventid = eventid; + cte->irq = irq; + + /* Move the new translation to the head of the list */ + list_move(&cte->entry, &dist->lpi_translation_cache); + +out: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_its_invalidate_cache(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + __vgic_put_lpi_locked(kvm, cte->irq); + cte->irq = NULL; + } + + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq) +{ + struct kvm_vcpu *vcpu; + struct its_ite *ite; + + if (!its->enabled) + return -EBUSY; + + ite = find_ite(its, devid, eventid); + if (!ite || !its_is_collection_mapped(ite->collection)) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + if (!vcpu) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + if (!vcpu->arch.vgic_cpu.lpis_enabled) + return -EBUSY; + + vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); + + *irq = ite->irq; + return 0; +} + +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) +{ + u64 address; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; + + if (!vgic_has_its(kvm)) + return ERR_PTR(-ENODEV); + + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return ERR_PTR(-EINVAL); + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); + + if (kvm_io_dev->ops != &kvm_io_gic_ops) + return ERR_PTR(-EINVAL); + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + if (iodev->iodev_type != IODEV_ITS) + return ERR_PTR(-EINVAL); + + return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct vgic_irq *irq = NULL; + unsigned long flags; + int err; + + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); + if (err) + return err; + + if (irq->hw) + return irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, true); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_irq *irq; + unsigned long flags; + phys_addr_t db; + + db = (u64)msi->address_hi << 32 | msi->address_lo; + irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); + + if (!irq) + return -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + +/* + * Queries the KVM IO bus framework to get the ITS pointer from the given + * doorbell address. + * We then call vgic_its_trigger_msi() with the decoded data. + * According to the KVM_SIGNAL_MSI API description returns 1 on success. + */ +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_its *its; + int ret; + + if (!vgic_its_inject_cached_translation(kvm, msi)) + return 1; + + its = vgic_msi_to_its(kvm, msi); + if (IS_ERR(its)) + return PTR_ERR(its); + + mutex_lock(&its->its_lock); + ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); + mutex_unlock(&its->its_lock); + + if (ret < 0) + return ret; + + /* + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 + * if the guest has blocked the MSI. So we map any LPI mapping + * related error to that. + */ + if (ret) + return 0; + else + return 1; +} + +/* Requires the its_lock to be held. */ +static void its_free_ite(struct kvm *kvm, struct its_ite *ite) +{ + list_del(&ite->ite_list); + + /* This put matches the get in vgic_add_lpi. */ + if (ite->irq) { + if (ite->irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + + vgic_put_irq(kvm, ite->irq); + } + + kfree(ite); +} + +static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) +{ + return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); +} + +#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) +#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_size(cmd) (its_cmd_mask_field(cmd, 1, 0, 5) + 1) +#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) +#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) +#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_ittaddr(cmd) (its_cmd_mask_field(cmd, 2, 8, 44) << 8) +#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) +#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) + +/* + * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + ite = find_ite(its, device_id, event_id); + if (ite && its_is_collection_mapped(ite->collection)) { + /* + * Though the spec talks about removing the pending state, we + * don't bother here since we clear the ITTE anyway and the + * pending state is a property of the ITTE struct. + */ + vgic_its_invalidate_cache(kvm); + + its_free_ite(kvm, ite); + return 0; + } + + return E_ITS_DISCARD_UNMAPPED_INTERRUPT; +} + +/* + * The MOVI command moves an ITTE to a different collection. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct kvm_vcpu *vcpu; + struct its_ite *ite; + struct its_collection *collection; + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_MOVI_UNMAPPED_INTERRUPT; + + if (!its_is_collection_mapped(ite->collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + ite->collection = collection; + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + vgic_its_invalidate_cache(kvm); + + return update_affinity(ite->irq, vcpu); +} + +/* + * Check whether an ID can be stored into the corresponding guest table. + * For a direct table this is pretty easy, but gets a bit nasty for + * indirect tables. We check whether the resulting guest physical address + * is actually valid (covered by a memslot and guest accessible). + * For this we have to read the respective first level entry. + */ +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, + gpa_t *eaddr) +{ + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); + int esz = GITS_BASER_ENTRY_SIZE(baser); + int index, idx; + gfn_t gfn; + bool ret; + + switch (type) { + case GITS_BASER_TYPE_DEVICE: + if (id >= BIT_ULL(VITS_TYPER_DEVBITS)) + return false; + break; + case GITS_BASER_TYPE_COLLECTION: + /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */ + if (id >= BIT_ULL(16)) + return false; + break; + default: + return false; + } + + if (!(baser & GITS_BASER_INDIRECT)) { + phys_addr_t addr; + + if (id >= (l1_tbl_size / esz)) + return false; + + addr = base + id * esz; + gfn = addr >> PAGE_SHIFT; + + if (eaddr) + *eaddr = addr; + + goto out; + } + + /* calculate and check the index into the 1st level */ + index = id / (SZ_64K / esz); + if (index >= (l1_tbl_size / sizeof(u64))) + return false; + + /* Each 1st level entry is represented by a 64-bit value. */ + if (kvm_read_guest_lock(its->dev->kvm, + base + index * sizeof(indirect_ptr), + &indirect_ptr, sizeof(indirect_ptr))) + return false; + + indirect_ptr = le64_to_cpu(indirect_ptr); + + /* check the valid bit of the first level entry */ + if (!(indirect_ptr & BIT_ULL(63))) + return false; + + /* Mask the guest physical address and calculate the frame number. */ + indirect_ptr &= GENMASK_ULL(51, 16); + + /* Find the address of the actual entry */ + index = id % (SZ_64K / esz); + indirect_ptr += index * esz; + gfn = indirect_ptr >> PAGE_SHIFT; + + if (eaddr) + *eaddr = indirect_ptr; + +out: + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; +} + +static int vgic_its_alloc_collection(struct vgic_its *its, + struct its_collection **colp, + u32 coll_id) +{ + struct its_collection *collection; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + + collection = kzalloc(sizeof(*collection), GFP_KERNEL); + if (!collection) + return -ENOMEM; + + collection->collection_id = coll_id; + collection->target_addr = COLLECTION_NOT_MAPPED; + + list_add_tail(&collection->coll_list, &its->collection_list); + *colp = collection; + + return 0; +} + +static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) +{ + struct its_collection *collection; + struct its_device *device; + struct its_ite *ite; + + /* + * Clearing the mapping for that collection ID removes the + * entry from the list. If there wasn't any before, we can + * go home early. + */ + collection = find_collection(its, coll_id); + if (!collection) + return; + + for_each_lpi_its(device, ite, its) + if (ite->collection && + ite->collection->collection_id == coll_id) + ite->collection = NULL; + + list_del(&collection->coll_list); + kfree(collection); +} + +/* Must be called with its_lock mutex held */ +static struct its_ite *vgic_its_alloc_ite(struct its_device *device, + struct its_collection *collection, + u32 event_id) +{ + struct its_ite *ite; + + ite = kzalloc(sizeof(*ite), GFP_KERNEL); + if (!ite) + return ERR_PTR(-ENOMEM); + + ite->event_id = event_id; + ite->collection = collection; + + list_add_tail(&ite->ite_list, &device->itt_head); + return ite; +} + +/* + * The MAPTI and MAPI commands map LPIs to ITTEs. + * Must be called with its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_ite *ite; + struct kvm_vcpu *vcpu = NULL; + struct its_device *device; + struct its_collection *collection, *new_coll = NULL; + struct vgic_irq *irq; + int lpi_nr; + + device = find_its_device(its, device_id); + if (!device) + return E_ITS_MAPTI_UNMAPPED_DEVICE; + + if (event_id >= BIT_ULL(device->num_eventid_bits)) + return E_ITS_MAPTI_ID_OOR; + + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) + lpi_nr = its_cmd_get_physical_id(its_cmd); + else + lpi_nr = event_id; + if (lpi_nr < GIC_LPI_OFFSET || + lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) + return E_ITS_MAPTI_PHYSICALID_OOR; + + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ + if (find_ite(its, device_id, event_id)) + return 0; + + collection = find_collection(its, coll_id); + if (!collection) { + int ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + new_coll = collection; + } + + ite = vgic_its_alloc_ite(device, collection, event_id); + if (IS_ERR(ite)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + return PTR_ERR(ite); + } + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_nr, vcpu); + if (IS_ERR(irq)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + its_free_ite(kvm, ite); + return PTR_ERR(irq); + } + ite->irq = irq; + + return 0; +} + +/* Requires the its_lock to be held. */ +static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) +{ + struct its_ite *ite, *temp; + + /* + * The spec says that unmapping a device with still valid + * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, + * since we cannot leave the memory unreferenced. + */ + list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) + its_free_ite(kvm, ite); + + vgic_its_invalidate_cache(kvm); + + list_del(&device->dev_list); + kfree(device); +} + +/* its lock must be held */ +static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_device *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) + vgic_its_free_device(kvm, cur); +} + +/* its lock must be held */ +static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_collection *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list) + vgic_its_free_collection(its, cur->collection_id); +} + +/* Must be called with its_lock mutex held */ +static struct its_device *vgic_its_alloc_device(struct vgic_its *its, + u32 device_id, gpa_t itt_addr, + u8 num_eventid_bits) +{ + struct its_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return ERR_PTR(-ENOMEM); + + device->device_id = device_id; + device->itt_addr = itt_addr; + device->num_eventid_bits = num_eventid_bits; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + return device; +} + +/* + * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + bool valid = its_cmd_get_validbit(its_cmd); + u8 num_eventid_bits = its_cmd_get_size(its_cmd); + gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); + struct its_device *device; + + if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL)) + return E_ITS_MAPD_DEVICE_OOR; + + if (valid && num_eventid_bits > VITS_TYPER_IDBITS) + return E_ITS_MAPD_ITTSIZE_OOR; + + device = find_its_device(its, device_id); + + /* + * The spec says that calling MAPD on an already mapped device + * invalidates all cached data for this device. We implement this + * by removing the mapping and re-establishing it. + */ + if (device) + vgic_its_free_device(kvm, device); + + /* + * The spec does not say whether unmapping a not-mapped device + * is an error, so we are done in any case. + */ + if (!valid) + return 0; + + device = vgic_its_alloc_device(its, device_id, itt_addr, + num_eventid_bits); + + return PTR_ERR_OR_ZERO(device); +} + +/* + * The MAPC command maps collection IDs to redistributors. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u16 coll_id; + u32 target_addr; + struct its_collection *collection; + bool valid; + + valid = its_cmd_get_validbit(its_cmd); + coll_id = its_cmd_get_collection(its_cmd); + target_addr = its_cmd_get_target_addr(its_cmd); + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MAPC_PROCNUM_OOR; + + if (!valid) { + vgic_its_free_collection(its, coll_id); + vgic_its_invalidate_cache(kvm); + } else { + collection = find_collection(its, coll_id); + + if (!collection) { + int ret; + + ret = vgic_its_alloc_collection(its, &collection, + coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + } else { + collection->target_addr = target_addr; + update_affinity_collection(kvm, its, collection); + } + } + + return 0; +} + +/* + * The CLEAR command removes the pending state for a particular LPI. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_CLEAR_UNMAPPED_INTERRUPT; + + ite->irq->pending_latch = false; + + if (ite->irq->hw) + return irq_set_irqchip_state(ite->irq->host_irq, + IRQCHIP_STATE_PENDING, false); + + return 0; +} + +/* + * The INV command syncs the configuration bits from the memory table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_INV_UNMAPPED_INTERRUPT; + + return update_lpi_config(kvm, ite->irq, NULL, true); +} + +/* + * The INVALL command requests flushing of all IRQ data in this collection. + * Find the VCPU mapped to that collection, then iterate over the VM's list + * of mapped LPIs and update the configuration for each IRQ which targets + * the specified vcpu. The configuration will be read from the in-memory + * configuration table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_collection *collection; + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_INVALL_UNMAPPED_COLLECTION; + + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu, false); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + + return 0; +} + +/* + * The MOVALL command moves the pending state of all IRQs targeting one + * redistributor to another. We don't hold the pending state in the VCPUs, + * but in the IRQs instead, so there is really not much to do for us here. + * However the spec says that no IRQ must target the old redistributor + * afterwards, so we make sure that no LPI is using the associated target_vcpu. + * This command affects all LPIs in the system that target that redistributor. + */ +static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 target1_addr = its_cmd_get_target_addr(its_cmd); + u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); + struct kvm_vcpu *vcpu1, *vcpu2; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + if (target1_addr >= atomic_read(&kvm->online_vcpus) || + target2_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MOVALL_PROCNUM_OOR; + + if (target1_addr == target2_addr) + return 0; + + vcpu1 = kvm_get_vcpu(kvm, target1_addr); + vcpu2 = kvm_get_vcpu(kvm, target2_addr); + + irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + + update_affinity(irq, vcpu2); + + vgic_put_irq(kvm, irq); + } + + vgic_its_invalidate_cache(kvm); + + kfree(intids); + return 0; +} + +/* + * The INT command injects the LPI associated with that DevID/EvID pair. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 msi_data = its_cmd_get_id(its_cmd); + u64 msi_devid = its_cmd_get_deviceid(its_cmd); + + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); +} + +/* + * This function is called with the its_cmd lock held, but the ITS data + * structure lock dropped. + */ +static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + int ret = -ENODEV; + + mutex_lock(&its->its_lock); + switch (its_cmd_get_command(its_cmd)) { + case GITS_CMD_MAPD: + ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); + break; + case GITS_CMD_MAPC: + ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); + break; + case GITS_CMD_MAPI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MAPTI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MOVI: + ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); + break; + case GITS_CMD_DISCARD: + ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); + break; + case GITS_CMD_CLEAR: + ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); + break; + case GITS_CMD_MOVALL: + ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); + break; + case GITS_CMD_INT: + ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); + break; + case GITS_CMD_INV: + ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); + break; + case GITS_CMD_INVALL: + ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); + break; + case GITS_CMD_SYNC: + /* we ignore this command: we are in sync all of the time */ + ret = 0; + break; + } + mutex_unlock(&its->its_lock); + + return ret; +} + +static u64 vgic_sanitise_its_baser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, + GITS_BASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, + GITS_BASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, + GITS_BASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* We support only one (ITS) page size: 64K */ + reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; + + return reg; +} + +static u64 vgic_sanitise_its_cbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, + GITS_CBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, + GITS_CBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, + GITS_CBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); + + return reg; +} + +static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cbaser, addr & 7, len); +} + +static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* When GITS_CTLR.Enable is 1, this register is RO. */ + if (its->enabled) + return; + + mutex_lock(&its->cmd_lock); + its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); + its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); + its->creadr = 0; + /* + * CWRITER is architecturally UNKNOWN on reset, but we need to reset + * it to CREADR to make sure we start with an empty command buffer. + */ + its->cwriter = its->creadr; + mutex_unlock(&its->cmd_lock); +} + +#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) +#define ITS_CMD_SIZE 32 +#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) + +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) +{ + gpa_t cbaser; + u64 cmd_buf[4]; + + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) + return; + + cbaser = GITS_CBASER_ADDRESS(its->cbaser); + + while (its->cwriter != its->creadr) { + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); + /* + * If kvm_read_guest() fails, this could be due to the guest + * programming a bogus value in CBASER or something else going + * wrong from which we cannot easily recover. + * According to section 6.3.2 in the GICv3 spec we can just + * ignore that command then. + */ + if (!ret) + vgic_its_handle_command(kvm, its, cmd_buf); + + its->creadr += ITS_CMD_SIZE; + if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) + its->creadr = 0; + } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + +static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cwriter, addr & 0x7, len); +} + +static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->creadr, addr & 0x7, len); +} + +static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 cmd_offset; + int ret = 0; + + mutex_lock(&its->cmd_lock); + + if (its->enabled) { + ret = -EBUSY; + goto out; + } + + cmd_offset = ITS_CMD_OFFSET(val); + if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + ret = -EINVAL; + goto out; + } + + its->creadr = cmd_offset; +out: + mutex_unlock(&its->cmd_lock); + return ret; +} + +#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) +static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg; + + switch (BASER_INDEX(addr)) { + case 0: + reg = its->baser_device_table; + break; + case 1: + reg = its->baser_coll_table; + break; + default: + reg = 0; + break; + } + + return extract_bytes(reg, addr & 7, len); +} + +#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) +static void vgic_mmio_write_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 entry_size, table_type; + u64 reg, *regptr, clearbits = 0; + + /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ + if (its->enabled) + return; + + switch (BASER_INDEX(addr)) { + case 0: + regptr = &its->baser_device_table; + entry_size = abi->dte_esz; + table_type = GITS_BASER_TYPE_DEVICE; + break; + case 1: + regptr = &its->baser_coll_table; + entry_size = abi->cte_esz; + table_type = GITS_BASER_TYPE_COLLECTION; + clearbits = GITS_BASER_INDIRECT; + break; + default: + return; + } + + reg = update_64bit_reg(*regptr, addr & 7, len, val); + reg &= ~GITS_BASER_RO_MASK; + reg &= ~clearbits; + + reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; + reg |= table_type << GITS_BASER_TYPE_SHIFT; + reg = vgic_sanitise_its_baser(reg); + + *regptr = reg; + + if (!(reg & GITS_BASER_VALID)) { + /* Take the its_lock to prevent a race with a save/restore */ + mutex_lock(&its->its_lock); + switch (table_type) { + case GITS_BASER_TYPE_DEVICE: + vgic_its_free_device_list(kvm, its); + break; + case GITS_BASER_TYPE_COLLECTION: + vgic_its_free_collection_list(kvm, its); + break; + } + mutex_unlock(&its->its_lock); + } +} + +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + /* + * It is UNPREDICTABLE to enable the ITS if any of the CBASER or + * device/collection BASER are invalid + */ + if (!its->enabled && (val & GITS_CTLR_ENABLE) && + (!(its->baser_device_table & GITS_BASER_VALID) || + !(its->baser_coll_table & GITS_BASER_VALID) || + !(its->cbaser & GITS_CBASER_VALID))) + goto out; + + its->enabled = !!(val & GITS_CTLR_ENABLE); + if (!its->enabled) + vgic_its_invalidate_cache(kvm); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + +out: + mutex_unlock(&its->cmd_lock); +} + +#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ +} + +#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ + .uaccess_its_write = uwr, \ +} + +static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static struct vgic_register_region its_registers[] = { + REGISTER_ITS_DESC(GITS_CTLR, + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_iidr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_TYPER, + vgic_mmio_read_its_typer, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CBASER, + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CWRITER, + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_creadr, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_BASER, + vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IDREGS_BASE, + vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, + VGIC_ACCESS_32bit), +}; + +/* This is called on setting the LPI enable bit in the redistributor. */ +void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) + its_sync_lpi_pending_table(vcpu); +} + +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its, + u64 addr) +{ + struct vgic_io_device *iodev = &its->iodev; + int ret; + + mutex_lock(&kvm->slots_lock); + if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -EBUSY; + goto out; + } + + its->vgic_its_base = addr; + iodev->regions = its_registers; + iodev->nr_regions = ARRAY_SIZE(its_registers); + kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); + + iodev->base_addr = its->vgic_its_base; + iodev->iodev_type = IODEV_ITS; + iodev->its = its; + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, + KVM_VGIC_V3_ITS_SIZE, &iodev->dev); +out: + mutex_unlock(&kvm->slots_lock); + + return ret; +} + +/* Default is 16 cached LPIs per vcpu */ +#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 + +void vgic_lpi_translation_cache_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned int sz; + int i; + + if (!list_empty(&dist->lpi_translation_cache)) + return; + + sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; + + for (i = 0; i < sz; i++) { + struct vgic_translation_cache_entry *cte; + + /* An allocation failure is not fatal */ + cte = kzalloc(sizeof(*cte), GFP_KERNEL); + if (WARN_ON(!cte)) + break; + + INIT_LIST_HEAD(&cte->entry); + list_add(&cte->entry, &dist->lpi_translation_cache); + } +} + +void vgic_lpi_translation_cache_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte, *tmp; + + vgic_its_invalidate_cache(kvm); + + list_for_each_entry_safe(cte, tmp, + &dist->lpi_translation_cache, entry) { + list_del(&cte->entry); + kfree(cte); + } +} + +#define INITIAL_BASER_VALUE \ + (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ + GITS_BASER_PAGE_SIZE_64K) + +#define INITIAL_PROPBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) + +static int vgic_its_create(struct kvm_device *dev, u32 type) +{ + struct vgic_its *its; + + if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) + return -ENODEV; + + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + if (!its) + return -ENOMEM; + + if (vgic_initialized(dev->kvm)) { + int ret = vgic_v4_init(dev->kvm); + if (ret < 0) { + kfree(its); + return ret; + } + + vgic_lpi_translation_cache_init(dev->kvm); + } + + mutex_init(&its->its_lock); + mutex_init(&its->cmd_lock); + + its->vgic_its_base = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&its->device_list); + INIT_LIST_HEAD(&its->collection_list); + + dev->kvm->arch.vgic.msis_require_devid = true; + dev->kvm->arch.vgic.has_its = true; + its->enabled = false; + its->dev = dev; + + its->baser_device_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); + its->baser_coll_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); + dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; + + dev->private = its; + + return vgic_its_set_abi(its, NR_ITS_ABIS - 1); +} + +static void vgic_its_destroy(struct kvm_device *kvm_dev) +{ + struct kvm *kvm = kvm_dev->kvm; + struct vgic_its *its = kvm_dev->private; + + mutex_lock(&its->its_lock); + + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); + + mutex_unlock(&its->its_lock); + kfree(its); + kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int vgic_its_has_attr_regs(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + gpa_t offset = attr->attr; + int align; + + align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7; + + if (offset & align) + return -EINVAL; + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) + return -ENXIO; + + return 0; +} + +static int vgic_its_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + const struct vgic_register_region *region; + struct vgic_its *its; + gpa_t addr, offset; + unsigned int len; + int align, ret = 0; + + its = dev->private; + offset = attr->attr; + + /* + * Although the spec supports upper/lower 32-bit accesses to + * 64-bit ITS registers, the userspace ABI requires 64-bit + * accesses to all 64-bit wide registers. We therefore only + * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID + * registers + */ + if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4)) + align = 0x3; + else + align = 0x7; + + if (offset & align) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -ENXIO; + goto out; + } + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) { + ret = -ENXIO; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + addr = its->vgic_its_base + offset; + + len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; + + if (is_write) { + if (region->uaccess_its_write) + ret = region->uaccess_its_write(dev->kvm, its, addr, + len, *reg); + else + region->its_write(dev->kvm, its, addr, len, *reg); + } else { + *reg = region->its_read(dev->kvm, its, addr, len); + } + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static u32 compute_next_devid_offset(struct list_head *h, + struct its_device *dev) +{ + struct its_device *next; + u32 next_offset; + + if (list_is_last(&dev->dev_list, h)) + return 0; + next = list_next_entry(dev, dev_list); + next_offset = next->device_id - dev->device_id; + + return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET); +} + +static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite) +{ + struct its_ite *next; + u32 next_offset; + + if (list_is_last(&ite->ite_list, h)) + return 0; + next = list_next_entry(ite, ite_list); + next_offset = next->event_id - ite->event_id; + + return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET); +} + +/** + * entry_fn_t - Callback called on a table entry restore path + * @its: its handle + * @id: id of the entry + * @entry: pointer to the entry + * @opaque: pointer to an opaque data + * + * Return: < 0 on error, 0 if last element was identified, id offset to next + * element otherwise + */ +typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, + void *opaque); + +/** + * scan_its_table - Scan a contiguous table in guest RAM and applies a function + * to each entry + * + * @its: its handle + * @base: base gpa of the table + * @size: size of the table in bytes + * @esz: entry size in bytes + * @start_id: the ID of the first entry in the table + * (non zero for 2d level tables) + * @fn: function to apply on each entry + * + * Return: < 0 on error, 0 if last element was identified, 1 otherwise + * (the last element may not be found on second level tables) + */ +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, + int start_id, entry_fn_t fn, void *opaque) +{ + struct kvm *kvm = its->dev->kvm; + unsigned long len = size; + int id = start_id; + gpa_t gpa = base; + char entry[ESZ_MAX]; + int ret; + + memset(entry, 0, esz); + + while (len > 0) { + int next_offset; + size_t byte_offset; + + ret = kvm_read_guest_lock(kvm, gpa, entry, esz); + if (ret) + return ret; + + next_offset = fn(its, id, entry, opaque); + if (next_offset <= 0) + return next_offset; + + byte_offset = next_offset * esz; + id += next_offset; + gpa += byte_offset; + len -= byte_offset; + } + return 1; +} + +/** + * vgic_its_save_ite - Save an interrupt translation entry at @gpa + */ +static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, + struct its_ite *ite, gpa_t gpa, int ite_esz) +{ + struct kvm *kvm = its->dev->kvm; + u32 next_offset; + u64 val; + + next_offset = compute_next_eventid_offset(&dev->itt_head, ite); + val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | + ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | + ite->collection->collection_id; + val = cpu_to_le64(val); + return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); +} + +/** + * vgic_its_restore_ite - restore an interrupt translation entry + * @event_id: id used for indexing + * @ptr: pointer to the ITE entry + * @opaque: pointer to the its_device + */ +static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, + void *ptr, void *opaque) +{ + struct its_device *dev = (struct its_device *)opaque; + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + struct kvm_vcpu *vcpu = NULL; + u64 val; + u64 *p = (u64 *)ptr; + struct vgic_irq *irq; + u32 coll_id, lpi_id; + struct its_ite *ite; + u32 offset; + + val = *p; + + val = le64_to_cpu(val); + + coll_id = val & KVM_ITS_ITE_ICID_MASK; + lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT; + + if (!lpi_id) + return 1; /* invalid entry, no choice but to scan next entry */ + + if (lpi_id < VGIC_MIN_LPI) + return -EINVAL; + + offset = val >> KVM_ITS_ITE_NEXT_SHIFT; + if (event_id + offset >= BIT_ULL(dev->num_eventid_bits)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (!collection) + return -EINVAL; + + ite = vgic_its_alloc_ite(dev, collection, event_id); + if (IS_ERR(ite)) + return PTR_ERR(ite); + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_id, vcpu); + if (IS_ERR(irq)) + return PTR_ERR(irq); + ite->irq = irq; + + return offset; +} + +static int vgic_its_ite_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_ite *itea = container_of(a, struct its_ite, ite_list); + struct its_ite *iteb = container_of(b, struct its_ite, ite_list); + + if (itea->event_id < iteb->event_id) + return -1; + else + return 1; +} + +static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = device->itt_addr; + struct its_ite *ite; + int ret; + int ite_esz = abi->ite_esz; + + list_sort(NULL, &device->itt_head, vgic_its_ite_cmp); + + list_for_each_entry(ite, &device->itt_head, ite_list) { + gpa_t gpa = base + ite->event_id * ite_esz; + + /* + * If an LPI carries the HW bit, this means that this + * interrupt is controlled by GICv4, and we do not + * have direct access to that state. Let's simply fail + * the save operation... + */ + if (ite->irq->hw) + return -EACCES; + + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_its_restore_itt - restore the ITT of a device + * + * @its: its handle + * @dev: device handle + * + * Return 0 on success, < 0 on error + */ +static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = dev->itt_addr; + int ret; + int ite_esz = abi->ite_esz; + size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz; + + ret = scan_its_table(its, base, max_size, ite_esz, 0, + vgic_its_restore_ite, dev); + + /* scan_its_table returns +1 if all ITEs are invalid */ + if (ret > 0) + ret = 0; + + return ret; +} + +/** + * vgic_its_save_dte - Save a device table entry at a given GPA + * + * @its: ITS handle + * @dev: ITS device + * @ptr: GPA + */ +static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, + gpa_t ptr, int dte_esz) +{ + struct kvm *kvm = its->dev->kvm; + u64 val, itt_addr_field; + u32 next_offset; + + itt_addr_field = dev->itt_addr >> 8; + next_offset = compute_next_devid_offset(&its->device_list, dev); + val = (1ULL << KVM_ITS_DTE_VALID_SHIFT | + ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) | + (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | + (dev->num_eventid_bits - 1)); + val = cpu_to_le64(val); + return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); +} + +/** + * vgic_its_restore_dte - restore a device table entry + * + * @its: its handle + * @id: device id the DTE corresponds to + * @ptr: kernel VA where the 8 byte DTE is located + * @opaque: unused + * + * Return: < 0 on error, 0 if the dte is the last one, id offset to the + * next dte otherwise + */ +static int vgic_its_restore_dte(struct vgic_its *its, u32 id, + void *ptr, void *opaque) +{ + struct its_device *dev; + gpa_t itt_addr; + u8 num_eventid_bits; + u64 entry = *(u64 *)ptr; + bool valid; + u32 offset; + int ret; + + entry = le64_to_cpu(entry); + + valid = entry >> KVM_ITS_DTE_VALID_SHIFT; + num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1; + itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK) + >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8; + + if (!valid) + return 1; + + /* dte entry is valid */ + offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = vgic_its_restore_itt(its, dev); + if (ret) { + vgic_its_free_device(its->dev->kvm, dev); + return ret; + } + + return offset; +} + +static int vgic_its_device_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_device *deva = container_of(a, struct its_device, dev_list); + struct its_device *devb = container_of(b, struct its_device, dev_list); + + if (deva->device_id < devb->device_id) + return -1; + else + return 1; +} + +/** + * vgic_its_save_device_tables - Save the device table and all ITT + * into guest RAM + * + * L1/L2 handling is hidden by vgic_its_check_id() helper which directly + * returns the GPA of the device entry + */ +static int vgic_its_save_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + struct its_device *dev; + int dte_esz = abi->dte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + list_sort(NULL, &its->device_list, vgic_its_device_cmp); + + list_for_each_entry(dev, &its->device_list, dev_list) { + int ret; + gpa_t eaddr; + + if (!vgic_its_check_id(its, baser, + dev->device_id, &eaddr)) + return -EINVAL; + + ret = vgic_its_save_itt(its, dev); + if (ret) + return ret; + + ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * handle_l1_dte - callback used for L1 device table entries (2 stage case) + * + * @its: its handle + * @id: index of the entry in the L1 table + * @addr: kernel VA + * @opaque: unused + * + * L1 table entries are scanned by steps of 1 entry + * Return < 0 if error, 0 if last dte was found when scanning the L2 + * table, +1 otherwise (meaning next L1 entry must be scanned) + */ +static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, + void *opaque) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int l2_start_id = id * (SZ_64K / abi->dte_esz); + u64 entry = *(u64 *)addr; + int dte_esz = abi->dte_esz; + gpa_t gpa; + int ret; + + entry = le64_to_cpu(entry); + + if (!(entry & KVM_ITS_L1E_VALID_MASK)) + return 1; + + gpa = entry & KVM_ITS_L1E_ADDR_MASK; + + ret = scan_its_table(its, gpa, SZ_64K, dte_esz, + l2_start_id, vgic_its_restore_dte, NULL); + + return ret; +} + +/** + * vgic_its_restore_device_tables - Restore the device table and all ITT + * from guest RAM to internal data structs + */ +static int vgic_its_restore_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + int l1_esz, ret; + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + gpa_t l1_gpa; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); + + if (baser & GITS_BASER_INDIRECT) { + l1_esz = GITS_LVL1_ENTRY_SIZE; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + handle_l1_dte, NULL); + } else { + l1_esz = abi->dte_esz; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + vgic_its_restore_dte, NULL); + } + + /* scan_its_table returns +1 if all entries are invalid */ + if (ret > 0) + ret = 0; + + return ret; +} + +static int vgic_its_save_cte(struct vgic_its *its, + struct its_collection *collection, + gpa_t gpa, int esz) +{ + u64 val; + + val = (1ULL << KVM_ITS_CTE_VALID_SHIFT | + ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | + collection->collection_id); + val = cpu_to_le64(val); + return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); +} + +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +{ + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + u32 target_addr, coll_id; + u64 val; + int ret; + + BUG_ON(esz > sizeof(val)); + ret = kvm_read_guest_lock(kvm, gpa, &val, esz); + if (ret) + return ret; + val = le64_to_cpu(val); + if (!(val & KVM_ITS_CTE_VALID_MASK)) + return 0; + + target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); + coll_id = val & KVM_ITS_CTE_ICID_MASK; + + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (collection) + return -EEXIST; + ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + return 1; +} + +/** + * vgic_its_save_collection_table - Save the collection table into + * guest RAM + */ +static int vgic_its_save_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); + struct its_collection *collection; + u64 val; + size_t max_size, filled = 0; + int ret, cte_esz = abi->cte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + if (ret) + return ret; + gpa += cte_esz; + filled += cte_esz; + } + + if (filled == max_size) + return 0; + + /* + * table is not fully filled, add a last dummy element + * with valid bit unset + */ + val = 0; + BUG_ON(cte_esz > sizeof(val)); + ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); + return ret; +} + +/** + * vgic_its_restore_collection_table - reads the collection table + * in guest memory and restores the ITS internal state. Requires the + * BASER registers to be restored before. + */ +static int vgic_its_restore_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + int cte_esz = abi->cte_esz; + size_t max_size, read = 0; + gpa_t gpa; + int ret; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + gpa = GITS_BASER_ADDR_48_to_52(baser); + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + while (read < max_size) { + ret = vgic_its_restore_cte(its, gpa, cte_esz); + if (ret <= 0) + break; + gpa += cte_esz; + read += cte_esz; + } + + if (ret > 0) + return 0; + + return ret; +} + +/** + * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM + * according to v0 ABI + */ +static int vgic_its_save_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_save_device_tables(its); + if (ret) + return ret; + + return vgic_its_save_collection_table(its); +} + +/** + * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM + * to internal data structs according to V0 ABI + * + */ +static int vgic_its_restore_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_restore_collection_table(its); + if (ret) + return ret; + + return vgic_its_restore_device_tables(its); +} + +static int vgic_its_commit_v0(struct vgic_its *its) +{ + const struct vgic_its_abi *abi; + + abi = vgic_its_get_abi(its); + its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + + its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + + its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + return 0; +} + +static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its) +{ + /* We need to keep the ABI specific field values */ + its->baser_coll_table &= ~GITS_BASER_VALID; + its->baser_device_table &= ~GITS_BASER_VALID; + its->cbaser = 0; + its->creadr = 0; + its->cwriter = 0; + its->enabled = 0; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); +} + +static int vgic_its_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_ITS_ADDR_TYPE: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_ITS_CTRL_RESET: + return 0; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + return 0; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: + return vgic_its_has_attr_regs(dev, attr); + } + return -ENXIO; +} + +static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ret = 0; + + if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ + return 0; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + switch (attr) { + case KVM_DEV_ARM_ITS_CTRL_RESET: + vgic_its_reset(kvm, its); + break; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + ret = abi->save_tables(its); + break; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + ret = abi->restore_tables(its); + break; + } + + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return ret; +} + +static int vgic_its_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_its *its = dev->private; + int ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + u64 addr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, + addr, SZ_64K); + if (ret) + return ret; + + return vgic_register_its_iodev(dev->kvm, its, addr); + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + return vgic_its_ctrl(dev->kvm, its, attr->attr); + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_its_attr_regs_access(dev, attr, ®, true); + } + } + return -ENXIO; +} + +static int vgic_its_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + struct vgic_its *its = dev->private; + u64 addr = its->vgic_its_base; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + } + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + int ret; + + ret = vgic_its_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + default: + return -ENXIO; + } + + return 0; +} + +static struct kvm_device_ops kvm_arm_vgic_its_ops = { + .name = "kvm-arm-vgic-its", + .create = vgic_its_create, + .destroy = vgic_its_destroy, + .set_attr = vgic_its_set_attr, + .get_attr = vgic_its_get_attr, + .has_attr = vgic_its_has_attr, +}; + +int kvm_vgic_register_its_device(void) +{ + return kvm_register_device_ops(&kvm_arm_vgic_its_ops, + KVM_DEV_TYPE_ARM_VGIC_ITS); +} diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c new file mode 100644 index 000000000000..44419679f91a --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC: KVM DEVICE API + * + * Copyright (C) 2015 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <linux/uaccess.h> +#include <asm/kvm_mmu.h> +#include <asm/cputype.h> +#include "vgic.h" + +/* common helpers */ + +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment) +{ + if (addr & ~kvm_phys_mask(kvm)) + return -E2BIG; + + if (!IS_ALIGNED(addr, alignment)) + return -EINVAL; + + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) + return -EEXIST; + + return 0; +} + +static int vgic_check_type(struct kvm *kvm, int type_needed) +{ + if (kvm->arch.vgic.vgic_model != type_needed) + return -ENODEV; + else + return 0; +} + +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX + * @addr: pointer to address value + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + * Check them for sanity (alignment, double assignment). We can't check for + * overlapping regions in case of a virtual GICv3 here, since we don't know + * the number of VCPUs yet, so we defer this check to map_resources(). + */ +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + int r = 0; + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t *addr_ptr, alignment; + u64 undef_value = VGIC_ADDR_UNDEF; + + mutex_lock(&kvm->lock); + switch (type) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_4K; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_cpu_base; + alignment = SZ_4K; + break; + case KVM_VGIC_V3_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_64K; + break; + case KVM_VGIC_V3_ADDR_TYPE_REDIST: { + struct vgic_redist_region *rdreg; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + if (write) { + r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); + goto out; + } + rdreg = list_first_entry(&vgic->rd_regions, + struct vgic_redist_region, list); + if (!rdreg) + addr_ptr = &undef_value; + else + addr_ptr = &rdreg->base; + break; + } + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + { + struct vgic_redist_region *rdreg; + u8 index; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + + index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + + if (write) { + gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) + >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; + u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) + >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + + if (!count || flags) + r = -EINVAL; + else + r = vgic_v3_set_redist_base(kvm, index, + base, count); + goto out; + } + + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + if (!rdreg) { + r = -ENOENT; + goto out; + } + + *addr = index; + *addr |= rdreg->base; + *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + goto out; + } + default: + r = -ENODEV; + } + + if (r) + goto out; + + if (write) { + r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); + if (!r) + *addr_ptr = *addr; + } else { + *addr = *addr_ptr; + } + +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int vgic_set_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + r = kvm_vgic_addr(dev->kvm, type, &addr, true); + return (r == -ENODEV) ? -ENXIO : r; + } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_RESERVED || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_spis = + val - VGIC_NR_PRIVATE_IRQS; + + mutex_unlock(&dev->kvm->lock); + + return ret; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + mutex_lock(&dev->kvm->lock); + r = vgic_init(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return r; + } + break; + } + } + + return -ENXIO; +} + +static int vgic_get_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + r = kvm_vgic_addr(dev->kvm, type, &addr, false); + if (r) + return (r == -ENODEV) ? -ENXIO : r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + r = put_user(dev->kvm->arch.vgic.nr_spis + + VGIC_NR_PRIVATE_IRQS, uaddr); + break; + } + } + + return r; +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm, type); +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +int kvm_register_vgic_device(unsigned long type) +{ + int ret = -ENODEV; + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, + KVM_DEV_TYPE_ARM_VGIC_V3); + + if (ret) + break; + ret = kvm_vgic_register_its_device(); + break; + } + + return ret; +} + +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + int cpuid; + + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) + return -EINVAL; + + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +void unlock_all_vcpus(struct kvm *kvm) +{ + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + int c; + + /* + * Any time a vcpu is run, vcpu_load is called which tries to grab the + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure + * that no other VCPUs are run and fiddle with the vgic state while we + * access it. + */ + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + +/** + * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_v2_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + ret = vgic_init(dev->kvm); + if (ret) + goto out; + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); + break; + default: + ret = -EINVAL; + break; + } + + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static int vgic_v2_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v2_attr_regs_access(dev, attr, ®, true); + } + } + + return -ENXIO; +} + +static int vgic_v2_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + ret = vgic_v2_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + } + + return -ENXIO; +} + +static int vgic_v2_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic-v2", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v2_set_attr, + .get_attr = vgic_v2_get_attr, + .has_attr = vgic_v2_has_attr, +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + unsigned long vgic_mpidr, mpidr_reg; + + /* + * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group, + * attr might not hold MPIDR. Hence assume vcpu0. + */ + if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) { + vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >> + KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT; + + mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr); + reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg); + } else { + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0); + } + + if (!reg_attr->vcpu) + return -EINVAL; + + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* + * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_v3_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + u32 tmp32; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { + ret = -EBUSY; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 regid; + + regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, + regid, reg); + break; + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + unsigned int info, intid; + + info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; + if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { + intid = attr->attr & + KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; + ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, + intid, reg); + } else { + ret = -EINVAL; + } + break; + } + default: + ret = -EINVAL; + break; + } + + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static int vgic_v3_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 tmp32; + u64 reg; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + int ret; + + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + ret = vgic_v3_save_pending_tables(dev->kvm); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return ret; + } + break; + } + } + return -ENXIO; +} + +static int vgic_v3_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + } + return -ENXIO; +} + +static int vgic_v3_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V3_ADDR_TYPE_DIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) == + VGIC_LEVEL_INFO_LINE_LEVEL) + return 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v3_ops = { + .name = "kvm-arm-vgic-v3", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v3_set_attr, + .get_attr = vgic_v3_get_attr, + .has_attr = vgic_v3_has_attr, +}; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c new file mode 100644 index 000000000000..a016f07adc28 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv2 MMIO handling functions + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/nospec.h> + +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 1: Report GICv2 interrupts as group 0 instead of group 1 + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + value = vgic->enabled ? GICD_ENABLE : 0; + break; + case GIC_DIST_CTR: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + break; + case GIC_DIST_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + bool was_enabled = dist->enabled; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + dist->enabled = val & GICD_ENABLE; + if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + break; + case GIC_DIST_CTR: + case GIC_DIST_IIDR: + /* Nothing to do */ + return; + } +} + +static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + switch (addr & 0x0c) { + case GIC_DIST_IIDR: + if (val != vgic_mmio_read_v2_misc(vcpu, addr, len)) + return -EINVAL; + + /* + * If we observe a write to GICD_IIDR we know that userspace + * has been updated and has had a chance to cope with older + * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting + * interrupts as group 1, and therefore we now allow groups to + * be user writable. Doing this by default would break + * migration from old kernels to new kernels with legacy + * userspace. + */ + vcpu->kvm->arch.vgic.v2_groups_user_writable = true; + return 0; + } + + vgic_mmio_write_v2_misc(vcpu, addr, len, val); + return 0; +} + +static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + if (vcpu->kvm->arch.vgic.v2_groups_user_writable) + vgic_mmio_write_group(vcpu, addr, len, val); + + return 0; +} + +static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); + int intid = val & 0xf; + int targets = (val >> 16) & 0xff; + int mode = (val >> 24) & 0x03; + int c; + struct kvm_vcpu *vcpu; + unsigned long flags; + + switch (mode) { + case 0x0: /* as specified by targets */ + break; + case 0x1: + targets = (1U << nr_vcpus) - 1; /* all, ... */ + targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ + break; + case 0x2: /* this very vCPU only */ + targets = (1U << source_vcpu->vcpu_id); + break; + case 0x3: /* reserved */ + return; + } + + kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { + struct vgic_irq *irq; + + if (!(targets & (1U << c))) + continue; + + irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + irq->source |= 1U << source_vcpu->vcpu_id; + + vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags); + vgic_put_irq(source_vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->targets << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); + int i; + unsigned long flags; + + /* GICD_ITARGETSR[0-7] are read-only */ + if (intid < VGIC_NR_PRIVATE_IRQS) + return; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + int target; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->targets = (val >> (i * 8)) & cpu_mask; + target = irq->targets ? __ffs(irq->targets) : 0; + irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = addr & 0x0f; + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->source << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + return val; +} + +static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source &= ~((val >> (i * 8)) & 0xff); + if (!irq->source) + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source |= (val >> (i * 8)) & 0xff; + + if (irq->source) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + vgic_put_irq(vcpu->kvm, irq); + } +} + +#define GICC_ARCH_VERSION_V2 0x2 + +/* These are for userland accesses only, there is no guest-facing emulation. */ +static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_vmcr vmcr; + u32 val; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT; + val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT; + val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT; + val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT; + val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT; + val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT; + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; + break; + case GIC_CPU_BINPOINT: + val = vmcr.bpr; + break; + case GIC_CPU_ALIAS_BINPOINT: + val = vmcr.abpr; + break; + case GIC_CPU_IDENT: + val = ((PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + IMPLEMENTER_ARM); + break; + default: + return 0; + } + + return val; +} + +static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0); + vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1); + vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl); + vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn); + vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR); + vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS); + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; + break; + case GIC_CPU_BINPOINT: + vmcr.bpr = val; + break; + case GIC_CPU_ALIAS_BINPOINT: + vmcr.abpr = val; + break; + } + + vgic_set_vmcr(vcpu, &vmcr); +} + +static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return 0; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return 0; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + return vgicv3->vgic_ap1r[n]; + } +} + +static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return; + vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + vgicv3->vgic_ap1r[n] = val; + } +} + +static const struct vgic_register_region vgic_v2_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL, + vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, + NULL, vgic_mmio_uaccess_write_v2_misc, + 12, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, + vgic_mmio_read_group, vgic_mmio_write_group, + NULL, vgic_mmio_uaccess_write_v2_group, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, + vgic_mmio_read_pending, vgic_mmio_write_spending, + NULL, vgic_uaccess_write_spending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + NULL, vgic_uaccess_write_cpending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, + vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), +}; + +static const struct vgic_register_region vgic_v2_cpu_registers[] = { + REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, + vgic_mmio_read_apr, vgic_mmio_write_apr, 16, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v2_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_4K; +} + +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v2_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + iodev.regions = vgic_v2_cpu_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); + iodev.base_addr = 0; + break; + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} + +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_cpu_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), + .iodev_type = IODEV_CPUIF, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), + .iodev_type = IODEV_DIST, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c new file mode 100644 index 000000000000..d2339a2b9fb9 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -0,0 +1,1063 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv3 MMIO handling functions + */ + +#include <linux/bitfield.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* extract @num bytes at @offset bytes offset in data */ +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num) +{ + return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); +} + +/* allows updates of any half of a 64-bit register (or the whole thing) */ +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val) +{ + int lower = (offset & 4) * 8; + int upper = lower + 8 * len - 1; + + reg &= ~GENMASK_ULL(upper, lower); + val &= GENMASK_ULL(len * 8 - 1, 0); + + return reg | ((u64)val << lower); +} + +bool vgic_has_its(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + return dist->has_its; +} + +bool vgic_supports_direct_msis(struct kvm *kvm) +{ + return (kvm_vgic_global_state.has_gicv4_1 || + (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm))); +} + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value = 0; + + switch (addr & 0x0c) { + case GICD_CTLR: + if (vgic->enabled) + value |= GICD_CTLR_ENABLE_SS_G1; + value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; + if (vgic->nassgireq) + value |= GICD_CTLR_nASSGIreq; + break; + case GICD_TYPER: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + if (vgic_has_its(vcpu->kvm)) { + value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; + value |= GICD_TYPER_LPIS; + } else { + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + } + break; + case GICD_TYPER2: + if (kvm_vgic_global_state.has_gicv4_1) + value = GICD_TYPER2_nASSGIcap; + break; + case GICD_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (addr & 0x0c) { + case GICD_CTLR: { + bool was_enabled, is_hwsgi; + + mutex_lock(&vcpu->kvm->lock); + + was_enabled = dist->enabled; + is_hwsgi = dist->nassgireq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + /* Dist stays enabled? nASSGIreq is RO */ + if (was_enabled && dist->enabled) { + val &= ~GICD_CTLR_nASSGIreq; + val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi); + } + + /* Switching HW SGIs? */ + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + if (is_hwsgi != dist->nassgireq) + vgic_v4_configure_vsgis(vcpu->kvm); + + if (kvm_vgic_global_state.has_gicv4_1 && + was_enabled != dist->enabled) + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4); + else if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + + mutex_unlock(&vcpu->kvm->lock); + break; + } + case GICD_TYPER: + case GICD_TYPER2: + case GICD_IIDR: + /* This is at best for documentation purposes... */ + return; + } +} + +static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (addr & 0x0c) { + case GICD_TYPER2: + case GICD_IIDR: + if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) + return -EINVAL; + return 0; + case GICD_CTLR: + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + return 0; + } + + vgic_mmio_write_v3_misc(vcpu, addr, len, val); + return 0; +} + +static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + unsigned long ret = 0; + + if (!irq) + return 0; + + /* The upper word is RAZ for us. */ + if (!(addr & 4)) + ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + +static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq; + unsigned long flags; + + /* The upper word is WI for us since we don't implement Aff3. */ + if (addr & 4) + return; + + irq = vgic_get_irq(vcpu->kvm, NULL, intid); + + if (!irq) + return; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* We only care about and preserve Aff0, Aff1 and Aff2. */ + irq->mpidr = val & GENMASK(23, 0); + irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); +} + +static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; +} + + +static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool was_enabled = vgic_cpu->lpis_enabled; + + if (!vgic_has_its(vcpu->kvm)) + return; + + vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; + + if (was_enabled && !vgic_cpu->lpis_enabled) { + vgic_flush_pending_lpis(vcpu); + vgic_its_invalidate_cache(vcpu->kvm); + } + + if (!was_enabled && vgic_cpu->lpis_enabled) + vgic_enable_lpis(vcpu); +} + +static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_redist_region *rdreg = vgic_cpu->rdreg; + int target_vcpu_id = vcpu->vcpu_id; + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (addr == last_rdist_typer) + value |= GICR_TYPER_LAST; + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + return extract_bytes(value, addr & 7, len); +} + +static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); +} + +static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GICD_PIDR2: + /* report a GICv3 compliant implementation */ + return 0x3b; + } + + return 0; +} + +static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* + * pending state of interrupt is latched in pending_latch variable. + * Userspace will save and restore pending state and line_level + * separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + bool state = irq->pending_latch; + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &state); + WARN_ON(err); + } + + if (state) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (test_bit(i, &val)) { + /* + * pending_latch is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before pending info. + */ + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + irq->pending_latch = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* We want to avoid outer shareable. */ +u64 vgic_sanitise_shareability(u64 field) +{ + switch (field) { + case GIC_BASER_OuterShareable: + return GIC_BASER_InnerShareable; + default: + return field; + } +} + +/* Avoid any inner non-cacheable mapping. */ +u64 vgic_sanitise_inner_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_nCnB: + case GIC_BASER_CACHE_nC: + return GIC_BASER_CACHE_RaWb; + default: + return field; + } +} + +/* Non-cacheable or same-as-inner are OK. */ +u64 vgic_sanitise_outer_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_SameAsInner: + case GIC_BASER_CACHE_nC: + return field; + default: + return GIC_BASER_CACHE_nC; + } +} + +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)) +{ + u64 field = (reg & field_mask) >> field_shift; + + field = sanitise_fn(field) << field_shift; + return (reg & ~field_mask) | field; +} + +#define PROPBASER_RES0_MASK \ + (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) +#define PENDBASER_RES0_MASK \ + (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ + GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) + +static u64 vgic_sanitise_pendbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, + GICR_PENDBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, + GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, + GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PENDBASER_RES0_MASK; + + return reg; +} + +static u64 vgic_sanitise_propbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, + GICR_PROPBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, + GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, + GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PROPBASER_RES0_MASK; + return reg; +} + +static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return extract_bytes(dist->propbaser, addr & 7, len); +} + +static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 old_propbaser, propbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + do { + old_propbaser = READ_ONCE(dist->propbaser); + propbaser = old_propbaser; + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + } while (cmpxchg64(&dist->propbaser, old_propbaser, + propbaser) != old_propbaser); +} + +static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; + + value &= ~GICR_PENDBASER_PTZ; + + return extract_bytes(value, addr & 7, len); +} + +static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 old_pendbaser, pendbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + do { + old_pendbaser = READ_ONCE(vgic_cpu->pendbaser); + pendbaser = old_pendbaser; + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, + pendbaser) != old_pendbaser); +} + +/* + * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the + * redistributors, while SPIs are covered by registers in the distributor + * block. Trying to set private IRQs in this block gets ignored. + * We take some special care here to fix the calculation of the register + * offset. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .access_flags = acc, \ + .read = vgic_mmio_read_raz, \ + .write = vgic_mmio_write_wi, \ + }, { \ + .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .bits_per_irq = bpi, \ + .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +static const struct vgic_register_region vgic_v3_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR, + vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, + NULL, vgic_mmio_uaccess_write_v3_misc, + 16, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, + vgic_mmio_read_rao, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, + vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, + vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), +}; + +static const struct vgic_register_region vgic_v3_rd_registers[] = { + /* RD_base registers */ + REGISTER_DESC_WITH_LENGTH(GICR_CTLR, + vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_STATUSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IIDR, + vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, + vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, + vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), + /* SGI_base registers */ + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, + vgic_mmio_read_group, vgic_mmio_write_group, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, + vgic_mmio_read_priority, vgic_mmio_write_priority, 32, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0, + vgic_mmio_read_config, vgic_mmio_write_config, 8, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v3_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_64K; +} + +/** + * vgic_register_redist_iodev - register a single redist iodev + * @vcpu: The VCPU to which the redistributor belongs + * + * Register a KVM iodev for this VCPU's redistributor using the address + * provided. + * + * Return 0 on success, -ERRNO otherwise. + */ +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_redist_region *rdreg; + gpa_t rd_base; + int ret; + + if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) + return 0; + + /* + * We may be creating VCPUs before having set the base address for the + * redistributor region, in which case we will come back to this + * function for all VCPUs when the base address is set. Just return + * without doing any work for now. + */ + rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); + if (!rdreg) + return 0; + + if (!vgic_v3_check_base(kvm)) + return -EINVAL; + + vgic_cpu->rdreg = rdreg; + + rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; + + kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); + rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; + rd_dev->regions = vgic_v3_rd_registers; + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + rd_dev->redist_vcpu = vcpu; + + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, + 2 * SZ_64K, &rd_dev->dev); + mutex_unlock(&kvm->slots_lock); + + if (ret) + return ret; + + rdreg->free_index++; + return 0; +} + +static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + + kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); +} + +static int vgic_register_all_redist_iodevs(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c, ret = 0; + + kvm_for_each_vcpu(c, vcpu, kvm) { + ret = vgic_register_redist_iodev(vcpu); + if (ret) + break; + } + + if (ret) { + /* The current c failed, so we start with the previous one. */ + mutex_lock(&kvm->slots_lock); + for (c--; c >= 0; c--) { + vcpu = kvm_get_vcpu(kvm, c); + vgic_unregister_redist_iodev(vcpu); + } + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + +/** + * vgic_v3_insert_redist_region - Insert a new redistributor region + * + * Performs various checks before inserting the rdist region in the list. + * Those tests depend on whether the size of the rdist region is known + * (ie. count != 0). The list is sorted by rdist region index. + * + * @kvm: kvm handle + * @index: redist region index + * @base: base of the new rdist region + * @count: number of redistributors the region is made of (0 in the old style + * single region, whose size is induced from the number of vcpus) + * + * Return 0 on success, < 0 otherwise + */ +static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index, + gpa_t base, uint32_t count) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + struct list_head *rd_regions = &d->rd_regions; + size_t size = count * KVM_VGIC_V3_REDIST_SIZE; + int ret; + + /* single rdist region already set ?*/ + if (!count && !list_empty(rd_regions)) + return -EINVAL; + + /* cross the end of memory ? */ + if (base + size < base) + return -EINVAL; + + if (list_empty(rd_regions)) { + if (index != 0) + return -EINVAL; + } else { + rdreg = list_last_entry(rd_regions, + struct vgic_redist_region, list); + if (index != rdreg->index + 1) + return -EINVAL; + + /* Cannot add an explicitly sized regions after legacy region */ + if (!rdreg->count) + return -EINVAL; + } + + /* + * For legacy single-region redistributor regions (!count), + * check that the redistributor region does not overlap with the + * distributor's address space. + */ + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + vgic_dist_overlap(kvm, base, size)) + return -EINVAL; + + /* collision with any other rdist region? */ + if (vgic_v3_rdist_overlap(kvm, base, size)) + return -EINVAL; + + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + if (!rdreg) + return -ENOMEM; + + rdreg->base = VGIC_ADDR_UNDEF; + + ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + if (ret) + goto free; + + rdreg->base = base; + rdreg->count = count; + rdreg->free_index = 0; + rdreg->index = index; + + list_add_tail(&rdreg->list, rd_regions); + return 0; +free: + kfree(rdreg); + return ret; +} + +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) +{ + int ret; + + ret = vgic_v3_insert_redist_region(kvm, index, addr, count); + if (ret) + return ret; + + /* + * Register iodevs for each existing VCPU. Adding more VCPUs + * afterwards will register the iodevs when needed. + */ + ret = vgic_register_all_redist_iodevs(kvm); + if (ret) + return ret; + + return 0; +} + +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v3_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ + iodev.regions = vgic_v3_rd_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + iodev.base_addr = 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 reg, id; + + id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); + } + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} +/* + * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI + * generation register ICC_SGI1R_EL1) with a given VCPU. + * If the VCPU's MPIDR matches, return the level0 affinity, otherwise + * return -1. + */ +static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) +{ + unsigned long affinity; + int level0; + + /* + * Split the current VCPU's MPIDR into affinity level 0 and the + * rest as this is what we have to compare against. + */ + affinity = kvm_vcpu_get_mpidr_aff(vcpu); + level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); + affinity &= ~MPIDR_LEVEL_MASK; + + /* bail out if the upper three levels don't match */ + if (sgi_aff != affinity) + return -1; + + /* Is this VCPU's bit set in the mask ? */ + if (!(sgi_cpu_mask & BIT(level0))) + return -1; + + return level0; +} + +/* + * The ICC_SGI* registers encode the affinity differently from the MPIDR, + * so provide a wrapper to use the existing defines to isolate a certain + * affinity level. + */ +#define SGI_AFFINITY_LEVEL(reg, level) \ + ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ + >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/** + * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs + * @vcpu: The VCPU requesting a SGI + * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU + * @allow_group1: Does the sysreg access allow generation of G1 SGIs + * + * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. + * This will trap in sys_regs.c and call this function. + * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the + * target processors as well as a bitmask of 16 Aff0 CPUs. + * If the interrupt routing mode bit is not set, we iterate over all VCPUs to + * check for matching ones. If this bit is set, we signal all, but not the + * calling VCPU. + */ +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *c_vcpu; + u16 target_cpus; + u64 mpidr; + int sgi, c; + int vcpu_id = vcpu->vcpu_id; + bool broadcast; + unsigned long flags; + + sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; + broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); + target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + + /* + * We iterate over all VCPUs to find the MPIDRs matching the request. + * If we have handled one CPU, we clear its bit to detect early + * if we are already finished. This avoids iterating through all + * VCPUs when most of the times we just signal a single VCPU. + */ + kvm_for_each_vcpu(c, c_vcpu, kvm) { + struct vgic_irq *irq; + + /* Exit early if we have dealt with all requested CPUs */ + if (!broadcast && target_cpus == 0) + break; + + /* Don't signal the calling VCPU */ + if (broadcast && c == vcpu_id) + continue; + + if (!broadcast) { + int level0; + + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); + if (level0 == -1) + continue; + + /* remove this matching VCPU from the mask */ + target_cpus &= ~BIT(level0); + } + + irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targetting Group0 SGIs can only generate + * those, while an access targetting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v3_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers), + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device rd_dev = { + .regions = vgic_v3_rd_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers), + }; + + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); +} + +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val) +{ + if (intid % 32) + return -EINVAL; + + if (is_write) + vgic_write_irq_line_level_info(vcpu, intid, *val); + else + *val = vgic_read_irq_line_level_info(vcpu, intid); + + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c new file mode 100644 index 000000000000..b2d73fc0d1ef --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -0,0 +1,1088 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC MMIO handling functions + */ + +#include <linux/bitops.h> +#include <linux/bsearch.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/iodev.h> +#include <kvm/arm_arch_timer.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return 0; +} + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return -1UL; +} + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ + return 0; +} + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->group) + value |= BIT(i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static void vgic_update_vsgi(struct vgic_irq *irq) +{ + WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group)); +} + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->group = !!(val & BIT(i)); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } else { + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* + * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value + * of the enabled bit, so there is only one function for both here. + */ +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->enabled) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (!irq->enabled) { + struct irq_data *data; + + irq->enabled = true; + data = &irq_to_desc(irq->host_irq)->irq_data; + while (irqd_irq_disabled(data)) + enable_irq(irq->host_irq); + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } else if (vgic_irq_is_mapped_level(irq)) { + bool was_high = irq->line_level; + + /* + * We need to update the state of the interrupt because + * the guest might have changed the state of the device + * while the interrupt was disabled at the VGIC level. + */ + irq->line_level = vgic_get_phys_line_level(irq); + /* + * Deactivate the physical interrupt so the GIC will let + * us know when it is asserted again. + */ + if (!irq->active && was_high && !irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) + disable_irq_nosync(irq->host_irq); + + irq->enabled = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + bool val; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + val = false; + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &val); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else { + val = irq_is_pending(irq); + } + + value |= ((u32)val << i); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + irq->pending_latch = true; + if (irq->hw) + vgic_irq_set_phys_active(irq, true); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + irq->pending_latch = false; + + /* + * We don't want the guest to effectively mask the physical + * interrupt by doing a write to SPENDR followed by a write to + * CPENDR for HW interrupts, so we clear the active state on + * the physical side if the virtual interrupt is not active. + * This may lead to taking an additional interrupt on the + * host, but that should not be a problem as the worst that + * can happen is an additional vgic injection. We also clear + * the pending state to maintain proper semantics for edge HW + * interrupts. + */ + vgic_irq_set_phys_pending(irq, false); + if (!irq->active) + vgic_irq_set_phys_active(irq, false); +} + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to clear its pending bit */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + false); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + if (irq->hw) + vgic_hw_irq_cpending(vcpu, irq); + else + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* + * If we are fiddling with an IRQ's active state, we have to make sure the IRQ + * is not queued on some running VCPU's LRs, because then the change to the + * active state can be overwritten when the VCPU's state is synced coming back + * from the guest. + * + * For shared interrupts as well as GICv3 private interrupts, we have to + * stop all the VCPUs because interrupts can be migrated while we don't hold + * the IRQ locks and we don't want to be chasing moving targets. + * + * For GICv2 private interrupts we don't have to do anything because + * userspace accesses to the VGIC state already require all VCPUs to be + * stopped, and only the VCPU itself can modify its private interrupts + * active state, which guarantees that the VCPU is not running. + */ +static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_halt_guest(vcpu->kvm); +} + +/* See vgic_access_active_prepare */ +static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_resume_guest(vcpu->kvm); +} + +static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Even for HW interrupts, don't evaluate the HW state as + * all the guest is interested in is the virtual state. + */ + if (irq->active) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 val; + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + val = __vgic_mmio_read_active(vcpu, addr, len); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); + + return val; +} + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __vgic_mmio_read_active(vcpu, addr, len); +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active, bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->active = active; + vgic_irq_set_phys_active(irq, active); +} + +static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active) +{ + unsigned long flags; + struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu(); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && !vgic_irq_is_sgi(irq->intid)) { + vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + } else if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* + * GICv4.1 VSGI feature doesn't track an active state, + * so let's not kid ourselves, there is nothing we can + * do here. + */ + irq->active = false; + } else { + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u8 active_source; + + irq->active = active; + + /* + * The GICv2 architecture indicates that the source CPUID for + * an SGI should be provided during an EOI which implies that + * the active state is stored somewhere, but at the same time + * this state is not architecturally exposed anywhere and we + * have no way of knowing the right source. + * + * This may lead to a VCPU not being able to receive + * additional instances of a particular SGI after migration + * for a GICv2 VM on some GIC implementations. Oh well. + */ + active_source = (requester_vcpu) ? requester_vcpu->vcpu_id : 0; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2 && + active && vgic_irq_is_sgi(irq->intid)) + irq->active_source = active_source; + } + + if (irq->active) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +} + +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, false); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); + return 0; +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, true); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); + return 0; +} + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->priority << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +/* + * We currently don't handle changing the priority of an interrupt that + * is already pending on a VCPU. If there is a need for this, we would + * need to make this VCPU exit and re-evaluate the priorities, potentially + * leading to this interrupt getting presented now to the guest (if it has + * been masked by the priority mask before). + */ +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* Narrow the priority range to what we actually support */ + irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + u32 value = 0; + int i; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->config == VGIC_CONFIG_EDGE) + value |= (2U << (i * 2)); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + int i; + unsigned long flags; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq; + + /* + * The configuration cannot be changed for SGIs in general, + * for PPIs this is IMPLEMENTATION DEFINED. The arch timer + * code relies on PPIs being level triggered, so we also + * make them read-only here. + */ + if (intid + i < VGIC_NR_PRIVATE_IRQS) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (test_bit(i * 2 + 1, &val)) + irq->config = VGIC_CONFIG_EDGE; + else + irq->config = VGIC_CONFIG_LEVEL; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +{ + int i; + u64 val = 0; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) + val |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val) +{ + int i; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + unsigned long flags; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + bool new_level; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Line level is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before line level. + */ + new_level = !!(val & (1U << i)); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->line_level = new_level; + if (new_level) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +static int match_region(const void *key, const void *elt) +{ + const unsigned int offset = (unsigned long)key; + const struct vgic_register_region *region = elt; + + if (offset < region->reg_offset) + return -1; + + if (offset >= region->reg_offset + region->len) + return 1; + + return 0; +} + +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset) +{ + return bsearch((void *)(uintptr_t)offset, regions, nr_regions, + sizeof(regions[0]), match_region); +} + +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + +/* + * kvm_mmio_read_buf() returns a value in a format where it can be converted + * to a byte array and be directly observed as the guest wanted it to appear + * in memory if it had done the store itself, which is LE for the GIC, as the + * guest knows the GIC is always LE. + * + * We convert this value to the CPUs native format to deal with it as a data + * value. + */ +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) +{ + unsigned long data = kvm_mmio_read_buf(val, len); + + switch (len) { + case 1: + return data; + case 2: + return le16_to_cpu(data); + case 4: + return le32_to_cpu(data); + default: + return le64_to_cpu(data); + } +} + +/* + * kvm_mmio_write_buf() expects a value in a format such that if converted to + * a byte array it is observed as the guest would see it if it could perform + * the load directly. Since the GIC is LE, and the guest knows this, the + * guest expects a value in little endian format. + * + * We convert the data value from the CPUs native format to LE so that the + * value is returned in the proper format. + */ +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data) +{ + switch (len) { + case 1: + break; + case 2: + data = cpu_to_le16(data); + break; + case 4: + data = cpu_to_le32(data); + break; + default: + data = cpu_to_le64(data); + } + + kvm_mmio_write_buf(buf, len, data); +} + +static +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) +{ + return container_of(dev, struct vgic_io_device, dev); +} + +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, + gpa_t addr, int len) +{ + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } + + return false; +} + +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len) +{ + const struct vgic_register_region *region; + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region || !check_region(vcpu->kvm, region, addr, len)) + return NULL; + + return region; +} + +static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) { + *val = 0; + return 0; + } + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_read) + *val = region->uaccess_read(r_vcpu, addr, sizeof(u32)); + else + *val = region->read(r_vcpu, addr, sizeof(u32)); + + return 0; +} + +static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, const u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) + return 0; + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_write) + return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); + + region->write(r_vcpu, addr, sizeof(u32), *val); + return 0; +} + +/* + * Userland access to VGIC registers. + */ +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val) +{ + if (is_write) + return vgic_uaccess_write(vcpu, &dev->dev, offset, val); + else + return vgic_uaccess_read(vcpu, &dev->dev, offset, val); +} + +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = 0; + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) { + memset(val, 0, len); + return 0; + } + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + data = region->read(vcpu, addr, len); + break; + case IODEV_DIST: + data = region->read(vcpu, addr, len); + break; + case IODEV_REDIST: + data = region->read(iodev->redist_vcpu, addr, len); + break; + case IODEV_ITS: + data = region->its_read(vcpu->kvm, iodev->its, addr, len); + break; + } + + vgic_data_host_to_mmio_bus(val, len, data); + return 0; +} + +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = vgic_data_mmio_bus_to_host(val, len); + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) + return 0; + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + region->write(vcpu, addr, len, data); + break; + case IODEV_DIST: + region->write(vcpu, addr, len, data); + break; + case IODEV_REDIST: + region->write(iodev->redist_vcpu, addr, len, data); + break; + case IODEV_ITS: + region->its_write(vcpu->kvm, iodev->its, addr, len, data); + break; + } + + return 0; +} + +struct kvm_io_device_ops kvm_io_gic_ops = { + .read = dispatch_mmio_read, + .write = dispatch_mmio_write, +}; + +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type type) +{ + struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; + int ret = 0; + unsigned int len; + + switch (type) { + case VGIC_V2: + len = vgic_v2_init_dist_iodev(io_device); + break; + case VGIC_V3: + len = vgic_v3_init_dist_iodev(io_device); + break; + default: + BUG_ON(1); + } + + io_device->base_addr = dist_base_address; + io_device->iodev_type = IODEV_DIST; + io_device->redist_vcpu = NULL; + + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); + mutex_unlock(&kvm->slots_lock); + + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h new file mode 100644 index 000000000000..fefcca2b14dc --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_MMIO_H__ +#define __KVM_ARM_VGIC_MMIO_H__ + +struct vgic_register_region { + unsigned int reg_offset; + unsigned int len; + unsigned int bits_per_irq; + unsigned int access_flags; + union { + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len); + }; + union { + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + void (*its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; + unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + union { + int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; +}; + +extern struct kvm_io_device_ops kvm_io_gic_ops; + +#define VGIC_ACCESS_8bit 1 +#define VGIC_ACCESS_32bit 2 +#define VGIC_ACCESS_64bit 4 + +/* + * Generate a mask that covers the number of bytes required to address + * up to 1024 interrupts, each represented by <bits> bits. This assumes + * that <bits> is a power of two. + */ +#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) + +/* + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of <bits>. + * This assumes that <bits> is a power of two. + */ +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ + 8 >> ilog2(bits)) + +/* + * Some VGIC registers store per-IRQ information, with a different number + * of bits per IRQ. For those registers this macro is used. + * The _WITH_LENGTH version instantiates registers with a fixed length + * and is mutually exclusive with the _PER_IRQ version. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = bpi * 1024 / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + } + +#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = urd, \ + .uaccess_write = uwr, \ + } + +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); + +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data); + +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num); + +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val); + +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val); + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); + +u64 vgic_sanitise_outer_cacheability(u64 reg); +u64 vgic_sanitise_inner_cacheability(u64 reg); +u64 vgic_sanitise_shareability(u64 reg); +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)); + +/* Find the proper register handler entry given a certain address offset */ +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset); + +#endif diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c new file mode 100644 index 000000000000..ebf53a4e1296 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" + +static inline void vgic_v2_write_lr(int lr, u32 val) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); +} + +void vgic_v2_init_lrs(void) +{ + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) + vgic_v2_write_lr(i, 0); +} + +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u32 lr_val) +{ + return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) && + !(lr_val & GICH_LR_HW); +} + +/* + * transfer the content of the LRs back into the corresponding ap_list: + * - active bit is transferred as is + * - pending bit is + * - transferred as is in case of edge sensitive IRQs + * - set to the line-level (resample time) for level sensitive IRQs + */ +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~GICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { + u32 val = cpuif->vgic_lr[lr]; + u32 cpuid, intid = val & GICH_LR_VIRTUALID; + struct vgic_irq *irq; + + /* Extract the source vCPU id from the LR */ + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + cpuid &= 7; + + /* Notify fds when the guest EOI'ed a level-triggered SPI */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit */ + irq->active = !!(val & GICH_LR_ACTIVE_BIT); + + if (irq->active && vgic_irq_is_sgi(intid)) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & GICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (vgic_irq_is_sgi(intid)) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; + + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* + * Populates the particular LR with the state of a given IRQ: + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq + * - for a level sensitive IRQ the pending state value is unchanged; + * it is dictated directly by the input level + * + * If @irq describes an SGI with multiple sources, we choose the + * lowest-numbered source VCPU and clear that bit in the source bitmap. + * + * The irq_lock must be held by the caller. + */ +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) { + val |= GICH_LR_ACTIVE_BIT; + if (vgic_irq_is_sgi(irq->intid)) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= GICH_LR_EOI; + } + } + + if (irq->group) + val |= GICH_LR_GROUP1; + + if (irq->hw) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= GICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid)) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= GICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v2_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) + irq->line_level = false; + + /* The GICv2 LR only holds five bits of priority. */ + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; +} + +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; +} + +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) & + GICH_VMCR_ENABLE_GRP0_MASK; + vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) & + GICH_VMCR_ENABLE_GRP1_MASK; + vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) & + GICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) & + GICH_VMCR_FIQ_EN_MASK; + vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) & + GICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) & + GICH_VMCR_EOI_MODE_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & + GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & + GICH_VMCR_BINPOINT_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >> + GICH_VMCR_ENABLE_GRP0_SHIFT; + vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >> + GICH_VMCR_ENABLE_GRP1_SHIFT; + vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >> + GICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >> + GICH_VMCR_FIQ_EN_SHIFT; + vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >> + GICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >> + GICH_VMCR_EOI_MODE_SHIFT; + + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> + GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> + GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; +} + +void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +/* check for overlapping regions and for regions crossing the end of memory */ +static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) +{ + if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) + return false; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) + return false; + + if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) + return true; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) + return true; + + return false; +} + +int vgic_v2_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret = 0; + + if (vgic_ready(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { + kvm_err("Need to set vgic cpu and dist addresses first\n"); + ret = -ENXIO; + goto out; + } + + if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { + kvm_err("VGIC CPU and dist frames overlap\n"); + ret = -EINVAL; + goto out; + } + + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); + if (ret) { + kvm_err("Unable to initialize VGIC dynamic data structures\n"); + goto out; + } + + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); + if (ret) { + kvm_err("Unable to register VGIC MMIO regions\n"); + goto out; + } + + if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, + kvm_vgic_global_state.vcpu_base, + KVM_VGIC_V2_CPU_SIZE, true); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + } + + dist->ready = true; + +out: + return ret; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); + +/** + * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv2 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v2_probe(const struct gic_kvm_info *info) +{ + int ret; + u32 vtr; + + if (!info->vctrl.start) { + kvm_err("GICH not present in the firmware table\n"); + return -ENXIO; + } + + if (!PAGE_ALIGNED(info->vcpu.start) || + !PAGE_ALIGNED(resource_size(&info->vcpu))) { + kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); + + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); + if (ret) { + kvm_err("Cannot map GICV into hyp\n"); + goto out; + } + + static_branch_enable(&vgic_v2_cpuif_trap); + } + + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out; + } + + vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); + kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + goto out; + } + + kvm_vgic_global_state.can_emulate_gicv2 = true; + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.type = VGIC_V2; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; + + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); + + return 0; +out: + if (kvm_vgic_global_state.vctrl_base) + iounmap(kvm_vgic_global_state.vctrl_base); + if (kvm_vgic_global_state.vcpu_base_va) + iounmap(kvm_vgic_global_state.vcpu_base_va); + + return ret; +} + +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = cpu_if->used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + + if (!base) + return; + + if (used_lrs) { + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + +void vgic_v2_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); +} + +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c new file mode 100644 index 000000000000..76e2d85789ed --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_asm.h> + +#include "vgic.h" + +static bool group0_trap; +static bool group1_trap; +static bool common_trap; +static bool gicv4_enable; + +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u64 lr_val) +{ + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~ICH_HCR_UIE; + + for (lr = 0; lr < cpuif->used_lrs; lr++) { + u64 val = cpuif->vgic_lr[lr]; + u32 intid, cpuid; + struct vgic_irq *irq; + bool is_v2_sgi = false; + + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { + intid = val & ICH_LR_VIRTUAL_ID_MASK; + } else { + intid = val & GICH_LR_VIRTUALID; + is_v2_sgi = vgic_irq_is_sgi(intid); + } + + /* Notify fds when the guest EOI'ed a level-triggered IRQ */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + continue; + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit */ + irq->active = !!(val & ICH_LR_ACTIVE_BIT); + + if (irq->active && is_v2_sgi) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & ICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (is_v2_sgi) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; + + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* Requires the irq to be locked already */ +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u64 val = irq->intid; + bool allow_pending = true, is_v2_sgi; + + is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2); + + if (irq->active) { + val |= ICH_LR_ACTIVE_BIT; + if (is_v2_sgi) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= ICH_LR_EOI; + } + } + + if (irq->hw) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= ICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= ICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + + if (irq->group) + val |= ICH_LR_GROUP; + + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; +} + +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; +} + +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & + ICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & + ICH_VMCR_FIQ_EN_MASK; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcr = ICH_VMCR_FIQ_EN_MASK; + } + + vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> + ICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> + ICH_VMCR_FIQ_EN_SHIFT; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcrp->fiqen = 1; + vmcrp->ackctl = 0; + } + + vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; +} + +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + +void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_v3->vgic_vmcr = 0; + + /* + * If we are emulating a GICv3, we do it in an non-GICv2-compatible + * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. + * This goes with the spec allowing the value to be RAO/WI. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { + vgic_v3->vgic_sre = 0; + } + + vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_ID_BITS_MASK) >> + ICH_VTR_ID_BITS_SHIFT; + vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_PRI_BITS_MASK) >> + ICH_VTR_PRI_BITS_SHIFT) + 1; + + /* Get the show on the road... */ + vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; +} + +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) +{ + struct kvm_vcpu *vcpu; + int byte_offset, bit_nr; + gpa_t pendbase, ptr; + bool status; + u8 val; + int ret; + unsigned long flags; + +retry: + vcpu = irq->target_vcpu; + if (!vcpu) + return 0; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + + status = val & (1 << bit_nr); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->target_vcpu != vcpu) { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + goto retry; + } + irq->pending_latch = status; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + if (status) { + /* clear consumed data */ + val &= ~(1 << bit_nr); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM + * kvm lock and all vcpu lock must be held + */ +int vgic_v3_save_pending_tables(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; + int ret; + u8 val; + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + int byte_offset, bit_nr; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr; + bool stored; + + vcpu = irq->target_vcpu; + if (!vcpu) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + if (ptr != last_ptr) { + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + last_ptr = ptr; + } + + stored = val & (1U << bit_nr); + if (stored == irq->pending_latch) + continue; + + if (irq->pending_latch) + val |= 1 << bit_nr; + else + val &= ~(1 << bit_nr); + + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + +/* + * Check for overlapping regions and for regions crossing the end of memory + * for base addresses which have already been set. + */ +bool vgic_v3_check_base(struct kvm *kvm) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) + return false; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < + rdreg->base) + return false; + } + + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) + return true; + + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} + +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + +int vgic_v3_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0; + int c; + + if (vgic_ready(kvm)) + goto out; + + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %d redistributor base not set\n", c); + ret = -ENXIO; + goto out; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { + kvm_err("Need to set vgic distributor addresses first\n"); + ret = -ENXIO; + goto out; + } + + if (!vgic_v3_check_base(kvm)) { + kvm_err("VGIC redist and dist frames overlap\n"); + ret = -EINVAL; + goto out; + } + + /* + * For a VGICv3 we require the userland to explicitly initialize + * the VGIC before we need to use it. + */ + if (!vgic_initialized(kvm)) { + ret = -EBUSY; + goto out; + } + + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); + if (ret) { + kvm_err("Unable to register VGICv3 dist MMIO regions\n"); + goto out; + } + + if (kvm_vgic_global_state.has_gicv4_1) + vgic_v4_configure_vsgis(kvm); + dist->ready = true; + +out: + return ret; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + +static int __init early_group0_trap_cfg(char *buf) +{ + return strtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + +static int __init early_group1_trap_cfg(char *buf) +{ + return strtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + +static int __init early_gicv4_enable(char *buf) +{ + return strtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + +/** + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv3 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v3_probe(const struct gic_kvm_info *info) +{ + u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); + int ret; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; + + /* GICv4 support? */ + if (info->has_v4) { + kvm_vgic_global_state.has_gicv4 = gicv4_enable; + kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; + kvm_info("GICv4%s support %sabled\n", + kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", + gicv4_enable ? "en" : "dis"); + } + + if (!info->vcpu.start) { + kvm_info("GICv3: no GICV resource entry\n"); + kvm_vgic_global_state.vcpu_base = 0; + } else if (!PAGE_ALIGNED(info->vcpu.start)) { + pr_warn("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)info->vcpu.start); + kvm_vgic_global_state.vcpu_base = 0; + } else { + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.can_emulate_gicv2 = true; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } + kvm_info("vgic-v2@%llx\n", info->vcpu.start); + } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + + if (kvm_vgic_global_state.vcpu_base == 0) + kvm_info("disabling GICv2 emulation\n"); + + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } + + if (group0_trap || group1_trap || common_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + group0_trap ? "G0" : "", + group1_trap ? "G1" : "", + common_trap ? "C" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } + + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.type = VGIC_V3; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + return 0; +} + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (likely(cpu_if->vgic_sre)) + kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if)); + + if (has_vhe()) + __vgic_v3_activate_traps(cpu_if); + + WARN_ON(vgic_v4_load(vcpu)); +} + +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + WARN_ON(vgic_v4_put(vcpu, false)); + + vgic_v3_vmcr_sync(vcpu); + + kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if)); + + if (has_vhe()) + __vgic_v3_deactivate_traps(cpu_if); +} diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c new file mode 100644 index 000000000000..27ac833e5ec7 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -0,0 +1,453 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kvm_host.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include "vgic.h" + +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + * generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + * generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + * kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + * the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + * the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_make_vpe_resident(). + * You must be in a non-preemptible context. On exit, a call to + * its_make_vpe_non_resident() tells the redistributor that we're done + * with the vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + +#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ + struct kvm_vcpu *vcpu = info; + + /* We got the message, no need to fire again */ + if (!kvm_vgic_global_state.has_gicv4_1 && + !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) + disable_irq_nosync(irq); + + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return IRQ_HANDLED; +} + +static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) +{ + vpe->sgi_config[irq->intid].enabled = irq->enabled; + vpe->sgi_config[irq->intid].group = irq->group; + vpe->sgi_config[irq->intid].priority = irq->priority; +} + +static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int i; + + /* + * With GICv4.1, every virtual SGI can be directly injected. So + * let's pretend that they are HW interrupts, tied to a host + * IRQ. The SGI code will do its magic. + */ + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw) + goto unlock; + + irq->hw = true; + irq->host_irq = irq_find_mapping(vpe->sgi_domain, i); + + /* Transfer the full irq state to the vPE */ + vgic_v4_sync_sgi_config(vpe, irq); + desc = irq_to_desc(irq->host_irq); + ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc), + false); + if (!WARN_ON(ret)) { + /* Transfer pending state */ + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_ON(ret); + irq->pending_latch = false; + } + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!irq->hw) + goto unlock; + + irq->hw = false; + ret = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &irq->pending_latch); + WARN_ON(ret); + + desc = irq_to_desc(irq->host_irq); + irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* Must be called with the kvm lock held */ +void vgic_v4_configure_vsgis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_arm_halt_guest(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (dist->nassgireq) + vgic_v4_enable_vsgis(vcpu); + else + vgic_v4_disable_vsgis(vcpu); + } + + kvm_arm_resume_guest(kvm); +} + +/** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm: Pointer to the VM being initialized + * + * We may be called each time a vITS is created, or when the + * vgic is initialized. This relies on kvm->lock to be + * held. In both cases, the number of vcpus should now be + * fixed. + */ +int vgic_v4_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, nr_vcpus, ret; + + if (!kvm_vgic_global_state.has_gicv4) + return 0; /* Nothing to see here... move along. */ + + if (dist->its_vm.vpes) + return 0; + + nr_vcpus = atomic_read(&kvm->online_vcpus); + + dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), + GFP_KERNEL); + if (!dist->its_vm.vpes) + return -ENOMEM; + + dist->its_vm.nr_vpes = nr_vcpus; + + kvm_for_each_vcpu(i, vcpu, kvm) + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_alloc_vcpu_irqs(&dist->its_vm); + if (ret < 0) { + kvm_err("VPE IRQ allocation failure\n"); + kfree(dist->its_vm.vpes); + dist->its_vm.nr_vpes = 0; + dist->its_vm.vpes = NULL; + return ret; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + int irq = dist->its_vm.vpes[i]->irq; + unsigned long irq_flags = DB_IRQ_FLAGS; + + /* + * Don't automatically enable the doorbell, as we're + * flipping it back and forth when the vcpu gets + * blocked. Also disable the lazy disabling, as the + * doorbell could kick us out of the guest too + * early... + * + * On GICv4.1, the doorbell is managed in HW and must + * be left enabled. + */ + if (kvm_vgic_global_state.has_gicv4_1) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + + ret = request_irq(irq, vgic_v4_doorbell_handler, + 0, "vcpu", vcpu); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* + * Trick: adjust the number of vpes so we know + * how many to nuke on teardown... + */ + dist->its_vm.nr_vpes = i; + break; + } + } + + if (ret) + vgic_v4_teardown(kvm); + + return ret; +} + +/** + * vgic_v4_teardown - Free the GICv4 data structures + * @kvm: Pointer to the VM being destroyed + * + * Relies on kvm->lock to be held. + */ +void vgic_v4_teardown(struct kvm *kvm) +{ + struct its_vm *its_vm = &kvm->arch.vgic.its_vm; + int i; + + if (!its_vm->vpes) + return; + + for (i = 0; i < its_vm->nr_vpes; i++) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); + int irq = its_vm->vpes[i]->irq; + + irq_clear_status_flags(irq, DB_IRQ_FLAGS); + free_irq(irq, vcpu); + } + + its_free_vcpu_irqs(its_vm); + kfree(its_vm->vpes); + its_vm->nr_vpes = 0; + its_vm->vpes = NULL; +} + +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) + return 0; + + return its_make_vpe_non_resident(vpe, need_db); +} + +int vgic_v4_load(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int err; + + if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) + return 0; + + /* + * Before making the VPE resident, make sure the redistributor + * corresponding to our current CPU expects us here. See the + * doc in drivers/irqchip/irq-gic-v4.c to understand how this + * turns into a VMOVP command at the ITS level. + */ + err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id())); + if (err) + return err; + + err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled); + if (err) + return err; + + /* + * Now that the VPE is resident, let's get rid of a potential + * doorbell interrupt that would still be pending. This is a + * GICv4.0 only "feature"... + */ + if (!kvm_vgic_global_state.has_gicv4_1) + err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false); + + return err; +} + +static struct vgic_its *vgic_get_its(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct kvm_msi msi = (struct kvm_msi) { + .address_lo = irq_entry->msi.address_lo, + .address_hi = irq_entry->msi.address_hi, + .data = irq_entry->msi.data, + .flags = irq_entry->msi.flags, + .devid = irq_entry->msi.devid, + }; + + return vgic_msi_to_its(kvm, &msi); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + struct its_vlpi_map map; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + /* Perform the actual DevID/EventID -> LPI translation. */ + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + /* + * Emit the mapping request. If it fails, the ITS probably + * isn't v4 compatible, so let's silently bail out. Holding + * the ITS lock should ensure that nothing can modify the + * target vcpu. + */ + map = (struct its_vlpi_map) { + .vm = &kvm->arch.vgic.its_vm, + .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, + .vintid = irq->intid, + .properties = ((irq->priority & 0xfc) | + (irq->enabled ? LPI_PROP_ENABLED : 0) | + LPI_PROP_GROUP1), + .db_enabled = true, + }; + + ret = its_map_vlpi(virq, &map); + if (ret) + goto out; + + irq->hw = true; + irq->host_irq = virq; + atomic_inc(&map.vpe->vlpi_count); + +out: + mutex_unlock(&its->its_lock); + return ret; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + WARN_ON(!(irq->hw && irq->host_irq == virq)); + if (irq->hw) { + atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); + irq->hw = false; + ret = its_unmap_vlpi(virq); + } + +out: + mutex_unlock(&its->its_lock); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c new file mode 100644 index 000000000000..c3643b7f101b --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.c @@ -0,0 +1,1020 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/list_sort.h> +#include <linux/nospec.h> + +#include <asm/kvm_hyp.h> + +#include "vgic.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; + +/* + * Locking order is always: + * kvm->lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled + * + * As the ap_list_lock might be taken from the timer interrupt handler, + * we have to disable IRQs before taking this lock and everything lower + * than it. + * + * If you need to take multiple locks, always take the upper lock first, + * then the lower ones, e.g. first take the its_lock, then the irq_lock. + * If you are already holding a lock and need to take a higher one, you + * have to drop the lower ranking lock first and re-aquire it after having + * taken the upper one. + * + * When taking more than one ap_list_lock at the same time, always take the + * lowest numbered VCPU's ap_list_lock first, so: + * vcpuX->vcpu_id < vcpuY->vcpu_id: + * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); + * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * + * Since the VGIC must support injecting virtual interrupts from ISRs, we have + * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer + * spinlocks for any lock that may be taken while injecting an interrupt. + */ + +/* + * Iterate over the VM's list of mapped LPIs to find the one with a + * matching interrupt ID and return a reference to the IRQ structure. + */ +static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = NULL; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (irq->intid != intid) + continue; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() later once it's finished with the IRQ. + */ + vgic_get_irq_kref(irq); + goto out_unlock; + } + irq = NULL; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +/* + * This looks up the virtual interrupt ID to get the corresponding + * struct vgic_irq. It also increases the refcount, so any caller is expected + * to call vgic_put_irq() once it's finished with this IRQ. + */ +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid) +{ + /* SGIs and PPIs */ + if (intid <= VGIC_MAX_PRIVATE) { + intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + /* SPIs */ + if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); + return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; + } + + /* LPIs */ + if (intid >= VGIC_MIN_LPI) + return vgic_get_lpi(kvm, intid); + + WARN(1, "Looking up struct vgic_irq for reserved INTID"); + return NULL; +} + +/* + * We can't do anything in here, because we lack the kvm pointer to + * lock and remove the item from the lpi_list. So we keep this function + * empty and use the return value of kref_put() to trigger the freeing. + */ +static void vgic_irq_release(struct kref *ref) +{ +} + +/* + * Drop the refcount on the LPI. Must be called with lpi_list_lock held. + */ +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + list_del(&irq->lpi_list); + dist->lpi_list_count--; + + kfree(irq); +} + +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; + + if (irq->intid < VGIC_MIN_LPI) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + __vgic_put_lpi_locked(kvm, irq); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + unsigned long flags; + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + if (irq->intid >= VGIC_MIN_LPI) { + raw_spin_lock(&irq->irq_lock); + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); +} + +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) +{ + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + pending)); +} + +bool vgic_get_phys_line_level(struct vgic_irq *irq) +{ + bool line_level; + + BUG_ON(!irq->hw); + + if (irq->get_input_level) + return irq->get_input_level(irq->intid); + + WARN_ON(irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &line_level)); + return line_level; +} + +/* Set/Clear the physical active state */ +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) +{ + + BUG_ON(!irq->hw); + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_ACTIVE, + active)); +} + +/** + * kvm_vgic_target_oracle - compute the target vcpu for an irq + * + * @irq: The irq to route. Must be already locked. + * + * Based on the current state of the interrupt (enabled, pending, + * active, vcpu and target_vcpu), compute the next vcpu this should be + * given to. Return NULL if this shouldn't be injected at all. + * + * Requires the IRQ lock to be held. + */ +static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) +{ + lockdep_assert_held(&irq->irq_lock); + + /* If the interrupt is active, it must stay on the current vcpu */ + if (irq->active) + return irq->vcpu ? : irq->target_vcpu; + + /* + * If the IRQ is not active but enabled and pending, we should direct + * it to its configured target VCPU. + * If the distributor is disabled, pending interrupts shouldn't be + * forwarded. + */ + if (irq->enabled && irq_is_pending(irq)) { + if (unlikely(irq->target_vcpu && + !irq->target_vcpu->kvm->arch.vgic.enabled)) + return NULL; + + return irq->target_vcpu; + } + + /* If neither active nor pending and enabled, then this IRQ should not + * be queued to any VCPU. + */ + return NULL; +} + +/* + * The order of items in the ap_lists defines how we'll pack things in LRs as + * well, the first items in the list being the first things populated in the + * LRs. + * + * A hard rule is that active interrupts can never be pushed out of the LRs + * (and therefore take priority) since we cannot reliably trap on deactivation + * of IRQs and therefore they have to be present in the LRs. + * + * Otherwise things should be sorted by the priority field and the GIC + * hardware support will take care of preemption of priority groups etc. + * + * Return negative if "a" sorts before "b", 0 to preserve order, and positive + * to sort "b" before "a". + */ +static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); + struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); + bool penda, pendb; + int ret; + + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + + raw_spin_lock(&irqa->irq_lock); + raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); + + if (irqa->active || irqb->active) { + ret = (int)irqb->active - (int)irqa->active; + goto out; + } + + penda = irqa->enabled && irq_is_pending(irqa); + pendb = irqb->enabled && irq_is_pending(irqb); + + if (!penda || !pendb) { + ret = (int)pendb - (int)penda; + goto out; + } + + /* Both pending and enabled, sort by priority */ + ret = irqa->priority - irqb->priority; +out: + raw_spin_unlock(&irqb->irq_lock); + raw_spin_unlock(&irqa->irq_lock); + return ret; +} + +/* Must be called with the ap_list_lock held */ +static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); +} + +/* + * Only valid injection if changing level for level-triggered IRQs or for a + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. + */ +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) +{ + if (irq->owner != owner) + return false; + + switch (irq->config) { + case VGIC_CONFIG_LEVEL: + return irq->line_level != level; + case VGIC_CONFIG_EDGE: + return level; + } + + return false; +} + +/* + * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. + * Do the queuing if necessary, taking the right locks in the right order. + * Returns true when the IRQ was queued, false otherwise. + * + * Needs to be entered with the IRQ lock already held, but will return + * with all locks dropped. + */ +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) +{ + struct kvm_vcpu *vcpu; + + lockdep_assert_held(&irq->irq_lock); + +retry: + vcpu = vgic_target_oracle(irq); + if (irq->vcpu || !vcpu) { + /* + * If this IRQ is already on a VCPU's ap_list, then it + * cannot be moved or modified and there is no more work for + * us to do. + * + * Otherwise, if the irq is not pending and enabled, it does + * not need to be inserted into an ap_list and there is also + * no more work for us to do. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + return false; + } + + /* + * We must unlock the irq lock to take the ap_list_lock where + * we are going to insert this new pending interrupt. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* someone can do stuff here, which we re-check below */ + + raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_lock(&irq->irq_lock); + + /* + * Did something change behind our backs? + * + * There are two cases: + * 1) The irq lost its pending state or was disabled behind our + * backs and/or it was queued to another VCPU's ap_list. + * 2) Someone changed the affinity on this irq behind our + * backs and we are now holding the wrong ap_list_lock. + * + * In both cases, drop the locks and retry. + */ + + if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, + flags); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + goto retry; + } + + /* + * Grab a reference to the irq to reflect the fact that it is + * now in the ap_list. + */ + vgic_get_irq_kref(irq); + list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); + irq->vcpu = vcpu; + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return true; +} + +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @intid: The INTID to inject a new state to. + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. + * + * The VGIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level, void *owner) +{ + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + unsigned long flags; + int ret; + + trace_vgic_update_irq_pending(cpuid, intid, level); + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + + vcpu = kvm_get_vcpu(kvm, cpuid); + if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + irq = vgic_get_irq(kvm, vcpu, intid); + if (!irq) + return -EINVAL; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!vgic_validate_injection(irq, level, owner)) { + /* Nothing to see here, move along... */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(kvm, irq); + return 0; + } + + if (irq->config == VGIC_CONFIG_LEVEL) + irq->line_level = level; + else + irq->pending_latch = true; + + vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); + + return 0; +} + +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + unsigned int host_irq, + bool (*get_input_level)(int vindid)) +{ + struct irq_desc *desc; + struct irq_data *data; + + /* + * Find the physical IRQ number corresponding to @host_irq + */ + desc = irq_to_desc(host_irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return -EINVAL; + } + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + irq->hw = true; + irq->host_irq = host_irq; + irq->hwintid = data->hwirq; + irq->get_input_level = get_input_level; + return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ + irq->hw = false; + irq->hwintid = 0; + irq->get_input_level = NULL; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, + u32 vintid, bool (*get_input_level)(int vindid)) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret; + + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return ret; +} + +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + kvm_vgic_unmap_irq(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return 0; +} + +/** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + unsigned long flags; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + return ret; +} + +/** + * vgic_prune_ap_list - Remove non-relevant interrupts from the list + * + * @vcpu: The VCPU pointer + * + * Go over the list of "interesting" interrupts, and prune those that we + * won't have to consider in the near future. + */ +static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + +retry: + raw_spin_lock(&vgic_cpu->ap_list_lock); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; + bool target_vcpu_needs_kick = false; + + raw_spin_lock(&irq->irq_lock); + + BUG_ON(vcpu != irq->vcpu); + + target_vcpu = vgic_target_oracle(irq); + + if (!target_vcpu) { + /* + * We don't need to process this interrupt any + * further, move it off the list. + */ + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + + /* + * This vgic_put_irq call matches the + * vgic_get_irq_kref in vgic_queue_irq_unlock, + * where we added the LPI to the ap_list. As + * we remove the irq from the list, we drop + * also drop the refcount. + */ + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + if (target_vcpu == vcpu) { + /* We're on the right CPU */ + raw_spin_unlock(&irq->irq_lock); + continue; + } + + /* This interrupt looks like it has to be migrated. */ + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vgic_cpu->ap_list_lock); + + /* + * Ensure locking order by always locking the smallest + * ID first. + */ + if (vcpu->vcpu_id < target_vcpu->vcpu_id) { + vcpuA = vcpu; + vcpuB = target_vcpu; + } else { + vcpuA = target_vcpu; + vcpuB = vcpu; + } + + raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, + SINGLE_DEPTH_NESTING); + raw_spin_lock(&irq->irq_lock); + + /* + * If the affinity has been preserved, move the + * interrupt around. Otherwise, it means things have + * changed while the interrupt was unlocked, and we + * need to replay this. + * + * In all cases, we cannot trust the list not to have + * changed, so we restart from the beginning. + */ + if (target_vcpu == vgic_target_oracle(irq)) { + struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; + + list_del(&irq->ap_list); + irq->vcpu = target_vcpu; + list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); + target_vcpu_needs_kick = true; + } + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + + if (target_vcpu_needs_kick) { + kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); + kvm_vcpu_kick(target_vcpu); + } + + goto retry; + } + + raw_spin_unlock(&vgic_cpu->ap_list_lock); +} + +static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_fold_lr_state(vcpu); + else + vgic_v3_fold_lr_state(vcpu); +} + +/* Requires the irq_lock to be held. */ +static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, + struct vgic_irq *irq, int lr) +{ + lockdep_assert_held(&irq->irq_lock); + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_populate_lr(vcpu, irq, lr); + else + vgic_v3_populate_lr(vcpu, irq, lr); +} + +static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_clear_lr(vcpu, lr); + else + vgic_v3_clear_lr(vcpu, lr); +} + +static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_underflow(vcpu); + else + vgic_v3_set_underflow(vcpu); +} + +/* Requires the ap_list_lock to be held. */ +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count = 0; + + *multi_sgi = false; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + int w; + + raw_spin_lock(&irq->irq_lock); + /* GICv2 SGIs can count for more than one... */ + w = vgic_irq_get_lr_count(irq); + raw_spin_unlock(&irq->irq_lock); + + count += w; + *multi_sgi |= (w > 1); + } + return count; +} + +/* Requires the VCPU's ap_list_lock to be held. */ +static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count; + bool multi_sgi; + u8 prio = 0xff; + int i = 0; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) + vgic_sort_ap_list(vcpu); + + count = 0; + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + + /* + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. + */ + if (multi_sgi && irq->priority > prio) { + _raw_spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { + vgic_populate_lr(vcpu, irq, count++); + + if (irq->source) + prio = irq->priority; + } + + raw_spin_unlock(&irq->irq_lock); + + if (count == kvm_vgic_global_state.nr_lr) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); + break; + } + } + + /* Nuke remaining LRs */ + for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) + vgic_clear_lr(vcpu, i); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; + else + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; +} + +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Sync back the hardware VGIC state into our emulation after a guest's run. */ +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + int used_lrs; + + /* An empty ap_list_head implies used_lrs == 0 */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) + return; + + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + else + used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + + if (used_lrs) + vgic_fold_lr_state(vcpu); + vgic_prune_ap_list(vcpu); +} + +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Flush our emulation state into the GIC hardware before entering the guest. */ +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + /* + * If there are no virtual interrupts active or pending for this + * VCPU, then there is no work to do and we can bail out without + * taking any lock. There is a potential race with someone injecting + * interrupts to the VCPU, but it is a benign race as the VCPU will + * either observe the new interrupt before or after doing this check, + * and introducing additional synchronization mechanism doesn't change + * this. + * + * Note that we still need to go through the whole thing if anything + * can be directly injected (GICv4). + */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && + !vgic_supports_direct_msis(vcpu->kvm)) + return; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + vgic_flush_lr_state(vcpu); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + } + + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); +} + +void kvm_vgic_load(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +} + +void kvm_vgic_put(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +} + +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + bool pending = false; + unsigned long flags; + struct vgic_vmcr vmcr; + + if (!vcpu->kvm->arch.vgic.enabled) + return false; + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) + return true; + + vgic_get_vmcr(vcpu, &vmcr); + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + pending = irq_is_pending(irq) && irq->enabled && + !irq->active && + irq->priority < vmcr.pmr; + raw_spin_unlock(&irq->irq_lock); + + if (pending) + break; + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + + return pending; +} + +void vgic_kick_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c; + + /* + * We've injected an interrupt, time to find out who deserves + * a good kick... + */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + } +} + +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + bool map_is_active; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return false; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + map_is_active = irq->hw && irq->active; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return map_is_active; +} diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h new file mode 100644 index 000000000000..64fcd7511110 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_NEW_H__ +#define __KVM_ARM_VGIC_NEW_H__ + +#include <linux/irqchip/arm-gic-common.h> + +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +#define INTERRUPT_ID_BITS_SPIS 10 +#define INTERRUPT_ID_BITS_ITS 16 +#define VGIC_PRI_BITS 5 + +#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) + +#define VGIC_AFFINITY_0_SHIFT 0 +#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT) +#define VGIC_AFFINITY_1_SHIFT 8 +#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT) +#define VGIC_AFFINITY_2_SHIFT 16 +#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT) +#define VGIC_AFFINITY_3_SHIFT 24 +#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT) + +#define VGIC_AFFINITY_LEVEL(reg, level) \ + ((((reg) & VGIC_AFFINITY_## level ##_MASK) \ + >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/* + * The Userspace encodes the affinity differently from the MPIDR, + * Below macro converts vgic userspace format to MPIDR reg format. + */ +#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \ + VGIC_AFFINITY_LEVEL(val, 1) | \ + VGIC_AFFINITY_LEVEL(val, 2) | \ + VGIC_AFFINITY_LEVEL(val, 3)) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst, + * below macros are defined for CPUREG encoding. + */ +#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 + +#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-its.rst, + * below macros are defined for ITS table entry encoding. + */ +#define KVM_ITS_CTE_VALID_SHIFT 63 +#define KVM_ITS_CTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_CTE_RDBASE_SHIFT 16 +#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_ITE_NEXT_SHIFT 48 +#define KVM_ITS_ITE_PINTID_SHIFT 16 +#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16) +#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_DTE_VALID_SHIFT 63 +#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_DTE_NEXT_SHIFT 49 +#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49) +#define KVM_ITS_DTE_ITTADDR_SHIFT 5 +#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5) +#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0) +#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63) +/* we only support 64 kB translation table page size */ +#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) + +#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0) +#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12) +#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12 +#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) +#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 + +#ifdef CONFIG_DEBUG_SPINLOCK +#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) +#else +#define DEBUG_SPINLOCK_BUG_ON(p) +#endif + +/* Requires the irq_lock to be held by the caller. */ +static inline bool irq_is_pending(struct vgic_irq *irq) +{ + if (irq->config == VGIC_CONFIG_EDGE) + return irq->pending_latch; + else + return irq->pending_latch || irq->line_level; +} + +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) +{ + return irq->config == VGIC_CONFIG_LEVEL && irq->hw; +} + +static inline int vgic_irq_get_lr_count(struct vgic_irq *irq) +{ + /* Account for the active state as an interrupt */ + if (vgic_irq_is_sgi(irq->intid) && irq->source) + return hweight8(irq->source) + irq->active; + + return irq_is_pending(irq) || irq->active; +} + +static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq) +{ + return vgic_irq_get_lr_count(irq) > 1; +} + +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ +struct vgic_vmcr { + u32 grpen0; + u32 grpen1; + + u32 ackctl; + u32 fiqen; + u32 cbpr; + u32 eoim; + + u32 abpr; + u32 bpr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ +}; + +struct vgic_reg_attr { + struct kvm_vcpu *vcpu; + gpa_t addr; +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid); +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); +void vgic_kick_vcpus(struct kvm *kvm); + +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment); + +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_enable(struct kvm_vcpu *vcpu); +int vgic_v2_probe(const struct gic_kvm_info *info); +int vgic_v2_map_resources(struct kvm *kvm); +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type); + +void vgic_v2_init_lrs(void); +void vgic_v2_load(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); + +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return; + + kref_get(&irq->refcount); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_enable(struct kvm_vcpu *vcpu); +int vgic_v3_probe(const struct gic_kvm_info *info); +int vgic_v3_map_resources(struct kvm *kvm); +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); +int vgic_v3_save_pending_tables(struct kvm *kvm); +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +bool vgic_v3_check_base(struct kvm *kvm); + +void vgic_v3_load(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); + +bool vgic_has_its(struct kvm *kvm); +int kvm_vgic_register_its_device(void); +void vgic_enable_lpis(struct kvm_vcpu *vcpu); +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu); +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u64 id, u64 *val); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg); +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val); +int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +int vgic_lazy_init(struct kvm *kvm); +int vgic_init(struct kvm *kvm); + +void vgic_debug_init(struct kvm *kvm); +void vgic_debug_destroy(struct kvm *kvm); + +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + +static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; + + /* + * num_pri_bits are initialized with HW supported values. + * We can rely safely on num_pri_bits even if VM has not + * restored ICC_CTLR_EL1 before restoring APnR registers. + */ + switch (cpu_if->num_pri_bits) { + case 7: return 3; + case 6: return 1; + default: return 0; + } +} + +static inline bool +vgic_v3_redist_region_full(struct vgic_redist_region *region) +{ + if (!region->count) + return false; + + return (region->free_index >= region->count); +} + +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs); + +static inline size_t +vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) +{ + if (!rdreg->count) + return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE; + else + return rdreg->count * KVM_VGIC_V3_REDIST_SIZE; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index); + +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); + +static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + + return (base + size > d->vgic_dist_base) && + (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); +} + +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq); +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); +void vgic_lpi_translation_cache_init(struct kvm *kvm); +void vgic_lpi_translation_cache_destroy(struct kvm *kvm); +void vgic_its_invalidate_cache(struct kvm *kvm); + +bool vgic_supports_direct_msis(struct kvm *kvm); +int vgic_v4_init(struct kvm *kvm); +void vgic_v4_teardown(struct kvm *kvm); +void vgic_v4_configure_vsgis(struct kvm *kvm); + +#endif diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index 5d147a33d648..6e82e0be8040 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -12,7 +12,7 @@ #include <linux/types.h> #include <linux/string.h> -#include <linux/uaccess.h> +#include <net/checksum.h> /* * XXX Fixme: those 2 inlines are meant for debugging and will go away diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild index a8d5e4fcbe53..d5d6ef9bb986 100644 --- a/arch/mips/Kbuild +++ b/arch/mips/Kbuild @@ -12,7 +12,7 @@ obj-y := $(platform-y) # make clean traverses $(obj-) without having included .config, so # everything ends up here -obj- := $(platform-) +obj- := $(platform-y) # mips object files # The object files are linked as core-y files would be linked diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index a69b272a3ab0..c7368a81fd1e 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -1,42 +1,44 @@ # SPDX-License-Identifier: GPL-2.0 # All platforms listed in alphabetic order -platforms += alchemy -platforms += ar7 -platforms += ath25 -platforms += ath79 -platforms += bcm47xx -platforms += bcm63xx -platforms += bmips -platforms += cavium-octeon -platforms += cobalt -platforms += dec -platforms += emma -platforms += generic -platforms += jazz -platforms += jz4740 -platforms += lantiq -platforms += lasat -platforms += loongson2ef -platforms += loongson32 -platforms += loongson64 -platforms += mti-malta -platforms += netlogic -platforms += paravirt -platforms += pic32 -platforms += pistachio -platforms += pmcs-msp71xx -platforms += pnx833x -platforms += ralink -platforms += rb532 -platforms += sgi-ip22 -platforms += sgi-ip27 -platforms += sgi-ip30 -platforms += sgi-ip32 -platforms += sibyte -platforms += sni -platforms += txx9 -platforms += vr41xx +platform-$(CONFIG_MIPS_ALCHEMY) += alchemy/ +platform-$(CONFIG_AR7) += ar7/ +platform-$(CONFIG_ATH25) += ath25/ +platform-$(CONFIG_ATH79) += ath79/ +platform-$(CONFIG_BCM47XX) += bcm47xx/ +platform-$(CONFIG_BCM63XX) += bcm63xx/ +platform-$(CONFIG_BMIPS_GENERIC) += bmips/ +platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/ +platform-$(CONFIG_MIPS_COBALT) += cobalt/ +platform-$(CONFIG_MACH_DECSTATION) += dec/ +platform-$(CONFIG_MIPS_GENERIC) += generic/ +platform-$(CONFIG_MACH_JAZZ) += jazz/ +platform-$(CONFIG_MACH_INGENIC) += jz4740/ +platform-$(CONFIG_LANTIQ) += lantiq/ +platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/ +platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ +platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ +platform-$(CONFIG_MIPS_MALTA) += mti-malta/ +platform-$(CONFIG_NLM_COMMON) += netlogic/ +platform-$(CONFIG_MIPS_PARAVIRT) += paravirt/ +platform-$(CONFIG_PIC32MZDA) += pic32/ +platform-$(CONFIG_MACH_PISTACHIO) += pistachio/ +platform-$(CONFIG_SOC_PNX833X) += pnx833x/ +platform-$(CONFIG_RALINK) += ralink/ +platform-$(CONFIG_MIKROTIK_RB532) += rb532/ +platform-$(CONFIG_SGI_IP22) += sgi-ip22/ +platform-$(CONFIG_SGI_IP27) += sgi-ip27/ +platform-$(CONFIG_SGI_IP28) += sgi-ip22/ +platform-$(CONFIG_SGI_IP30) += sgi-ip30/ +platform-$(CONFIG_SGI_IP32) += sgi-ip32/ +platform-$(CONFIG_SIBYTE_BCM112X) += sibyte/ +platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ +platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ +platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ +platform-$(CONFIG_SNI_RM) += sni/ +platform-$(CONFIG_MACH_TX39XX) += txx9/ +platform-$(CONFIG_MACH_TX49XX) += txx9/ +platform-$(CONFIG_MACH_VR41XX) += vr41xx/ # include the platform specific files -include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platforms)) +include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y)) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 94a91b5b7759..9dc08ee3d6b9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -91,6 +91,9 @@ config MIPS select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS +config MIPS_FIXUP_BIGPHYS_ADDR + bool + menu "Machine selection" choice @@ -156,6 +159,7 @@ config MIPS_ALCHEMY select CSRC_R4K select IRQ_MIPS_CPU select DMA_MAYBE_COHERENT # Au1000,1500,1100 aren't, rest is + select MIPS_FIXUP_BIGPHYS_ADDR if PCI select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_APM_EMULATION @@ -426,23 +430,6 @@ config LANTIQ select ARCH_HAS_RESET_CONTROLLER select RESET_CONTROLLER -config LASAT - bool "LASAT Networks platforms" - select CEVT_R4K - select CRC32 - select CSRC_R4K - select DMA_NONCOHERENT - select SYS_HAS_EARLY_PRINTK - select HAVE_PCI - select IRQ_MIPS_CPU - select PCI_GT64XXX_PCI0 - select MIPS_NILE4 - select R5000_CPU_SCACHE - select SYS_HAS_CPU_R5000 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL if BROKEN - select SYS_SUPPORTS_LITTLE_ENDIAN - config MACH_LOONGSON32 bool "Loongson 32-bit family of machines" select SYS_SUPPORTS_ZBOOT @@ -474,8 +461,10 @@ config MACH_LOONGSON64 select ISA select I8259 select IRQ_MIPS_CPU - select NR_CPUS_DEFAULT_4 + select NO_EXCEPT_FILL + select NR_CPUS_DEFAULT_64 select USE_GENERIC_EARLY_PRINTK_8250 + select PCI_DRIVERS_GENERIC select SYS_HAS_CPU_LOONGSON64 select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_SMP @@ -592,13 +581,6 @@ config MACH_PIC32 Microchip PIC32 is a family of general-purpose 32 bit MIPS core microcontrollers. -config NEC_MARKEINS - bool "NEC EMMA2RH Mark-eins board" - select SOC_EMMA2RH - select HAVE_PCI - help - This enables support for the NEC Electronics Mark-eins boards. - config MACH_VR41XX bool "NEC VR4100 series based machines" select CEVT_R4K @@ -620,30 +602,6 @@ config NXP_STB225 help Support for NXP Semiconductors STB225 Development Board. -config PMC_MSP - bool "PMC-Sierra MSP chipsets" - select CEVT_R4K - select CSRC_R4K - select DMA_NONCOHERENT - select SWAP_IO_SPACE - select NO_EXCEPT_FILL - select BOOT_RAW - select SYS_HAS_CPU_MIPS32_R1 - select SYS_HAS_CPU_MIPS32_R2 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_MIPS16 - select IRQ_MIPS_CPU - select SERIAL_8250 - select SERIAL_8250_CONSOLE - select USB_EHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_DESC - help - This adds support for the PMC-Sierra family of Multi-Service - Processor System-On-A-Chips. These parts include a number - of integrated peripherals, interfaces and DSPs in addition to - a variety of MIPS cores. - config RALINK bool "Ralink based machines" select CEVT_R4K @@ -1086,10 +1044,8 @@ source "arch/mips/generic/Kconfig" source "arch/mips/jazz/Kconfig" source "arch/mips/jz4740/Kconfig" source "arch/mips/lantiq/Kconfig" -source "arch/mips/lasat/Kconfig" source "arch/mips/pic32/Kconfig" source "arch/mips/pistachio/Kconfig" -source "arch/mips/pmcs-msp71xx/Kconfig" source "arch/mips/ralink/Kconfig" source "arch/mips/sgi-ip27/Kconfig" source "arch/mips/sibyte/Kconfig" @@ -1153,6 +1109,7 @@ config CSRC_IOASIC bool config CSRC_R4K + select CLOCKSOURCE_WATCHDOG if CPU_FREQ bool config CSRC_SB1250 @@ -1210,9 +1167,6 @@ config MIPS_BONITO64 config MIPS_MSC bool -config MIPS_NILE4 - bool - config SYNC_R4K bool @@ -1333,18 +1287,6 @@ config PCI_XTALK_BRIDGE config NO_EXCEPT_FILL bool -config SOC_EMMA2RH - bool - select CEVT_R4K - select CSRC_R4K - select DMA_NONCOHERENT - select IRQ_MIPS_CPU - select SWAP_IO_SPACE - select SYS_HAS_CPU_R5500 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_BIG_ENDIAN - config SOC_PNX833X bool select CEVT_R4K @@ -1418,9 +1360,6 @@ config MIPS_L1_CACHE_SHIFT default "4" if MIPS_L1_CACHE_SHIFT_4 default "5" -config HAVE_STD_PC_SERIAL_PORT - bool - config ARC_CMDLINE_ONLY bool @@ -1503,6 +1442,18 @@ config CPU_LOONGSON3_WORKAROUNDS If unsure, please say Y. +config CPU_LOONGSON3_CPUCFG_EMULATION + bool "Emulate the CPUCFG instruction on older Loongson cores" + default y + depends on CPU_LOONGSON64 + help + Loongson-3A R4 and newer have the CPUCFG instruction available for + userland to query CPU capabilities, much like CPUID on x86. This + option provides emulation of the instruction on older Loongson + cores, back to Loongson-3A1000. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E @@ -1579,6 +1530,21 @@ config CPU_MIPS32_R2 specific type of processor in your system, choose those that one otherwise CPU_MIPS32_R1 is a safe bet for any MIPS32 system. +config CPU_MIPS32_R5 + bool "MIPS32 Release 5" + depends on SYS_HAS_CPU_MIPS32_R5 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_MSA + select HAVE_KVM + select MIPS_O32_FP64_SUPPORT + help + Choose this option to build a kernel for release 5 or later of the + MIPS32 architecture. New MIPS processors, starting with the Warrior + family, are based on a MIPS32r5 processor. If you own an older + processor, you probably need to select MIPS32r1 or MIPS32r2 instead. + config CPU_MIPS32_R6 bool "MIPS32 Release 6" depends on SYS_HAS_CPU_MIPS32_R6 @@ -1631,6 +1597,23 @@ config CPU_MIPS64_R2 specific type of processor in your system, choose those that one otherwise CPU_MIPS64_R1 is a safe bet for any MIPS64 system. +config CPU_MIPS64_R5 + bool "MIPS64 Release 5" + depends on SYS_HAS_CPU_MIPS64_R5 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_HUGEPAGES + select CPU_SUPPORTS_MSA + select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32 + select HAVE_KVM + help + Choose this option to build a kernel for release 5 or later of the + MIPS64 architecture. This is a intermediate MIPS architecture + release partly implementing release 6 features. Though there is no + any hardware known to be based on this release. + config CPU_MIPS64_R6 bool "MIPS64 Release 6" depends on SYS_HAS_CPU_MIPS64_R6 @@ -1649,6 +1632,28 @@ config CPU_MIPS64_R6 family, are based on a MIPS64r6 processor. If you own an older processor, you probably need to select MIPS64r1 or MIPS64r2 instead. +config CPU_P5600 + bool "MIPS Warrior P5600" + depends on SYS_HAS_CPU_P5600 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_MSA + select CPU_SUPPORTS_UNCACHED_ACCELERATED + select CPU_SUPPORTS_CPUFREQ + select CPU_MIPSR2_IRQ_VI + select CPU_MIPSR2_IRQ_EI + select HAVE_KVM + select MIPS_O32_FP64_SUPPORT + help + Choose this option to build a kernel for MIPS Warrior P5600 CPU. + It's based on MIPS32r5 ISA with XPA, EVA, dual/quad issue exec pipes, + MMU with two-levels TLB, UCA, MSA, MDU core level features and system + level features like up to six P5600 calculation cores, CM2 with L2 + cache, IOCU/IOMMU (though might be unused depending on the system- + specific IP core configuration), GIC, CPC, virtualisation module, + eJTAG and PDtrace. + config CPU_R3000 bool "R3000" depends on SYS_HAS_CPU_R3000 @@ -1825,7 +1830,8 @@ endchoice config CPU_MIPS32_3_5_FEATURES bool "MIPS32 Release 3.5 Features" depends on SYS_HAS_CPU_MIPS32_R3_5 - depends on CPU_MIPS32_R2 || CPU_MIPS32_R6 + depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_MIPS32_R6 || \ + CPU_P5600 help Choose this option to build a kernel for release 2 or later of the MIPS32 architecture including features from the 3.5 release such as @@ -1845,7 +1851,7 @@ config CPU_MIPS32_3_5_EVA config CPU_MIPS32_R5_FEATURES bool "MIPS32 Release 5 Features" depends on SYS_HAS_CPU_MIPS32_R5 - depends on CPU_MIPS32_R2 + depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_P5600 help Choose this option to build a kernel for release 2 or later of the MIPS32 architecture including features from release 5 such as @@ -2000,6 +2006,10 @@ config SYS_HAS_CPU_MIPS64_R6 bool select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT +config SYS_HAS_CPU_P5600 + bool + select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT + config SYS_HAS_CPU_R3000 bool @@ -2083,11 +2093,13 @@ endmenu # config CPU_MIPS32 bool - default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 + default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R5 || \ + CPU_MIPS32_R6 || CPU_P5600 config CPU_MIPS64 bool - default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 + default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \ + CPU_MIPS64_R6 # # These indicate the revision of the architecture @@ -2103,6 +2115,13 @@ config CPU_MIPSR2 select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select MIPS_SPRAM +config CPU_MIPSR5 + bool + default y if CPU_MIPS32_R5 || CPU_MIPS64_R5 || CPU_P5600 + select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN + select MIPS_SPRAM + config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 @@ -2117,6 +2136,7 @@ config TARGET_ISA_REV int default 1 if CPU_MIPSR1 default 2 if CPU_MIPSR2 + default 5 if CPU_MIPSR5 default 6 if CPU_MIPSR6 default 0 help @@ -2706,7 +2726,11 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config RELOCATABLE bool "Relocatable kernel" - depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC) + depends on SYS_SUPPORTS_RELOCATABLE + depends on CPU_MIPS32_R2 || CPU_MIPS64_R2 || \ + CPU_MIPS32_R5 || CPU_MIPS64_R5 || \ + CPU_MIPS32_R6 || CPU_MIPS64_R6 || \ + CPU_P5600 || CAVIUM_OCTEON_SOC help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. @@ -3274,3 +3298,5 @@ endmenu source "drivers/firmware/Kconfig" source "arch/mips/kvm/Kconfig" + +source "arch/mips/vdso/Kconfig" diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 93a2974d2ab7..7a8d94cdd493 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -148,4 +148,14 @@ config MIPS_CPS_NS16550_SHIFT form their addresses. That is, log base 2 of the span between adjacent ns16550 registers in the system. +config MIPS_CPS_NS16550_WIDTH + int "UART Register Width" + default 1 + help + ns16550 registers width. UART registers IO access methods will be + selected in accordance with this parameter. By setting it to 1, 2 or + 4 UART registers will be accessed by means of lb/sb, lh/sh or lw/sw + instructions respectively. Any value not from that set activates + lb/sb instructions. + endif # MIPS_CPS_NS16550_BOOL diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e1c44aed8156..0d0f29d662c9 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -116,33 +116,8 @@ endif cflags-y += -ffreestanding -# -# We explicitly add the endianness specifier if needed, this allows -# to compile kernels with a toolchain for the other endianness. We -# carefully avoid to add it redundantly because gcc 3.3/3.4 complains -# when fed the toolchain default! -# -# Certain gcc versions up to gcc 4.1.1 (probably 4.2-subversion as of -# 2006-10-10 don't properly change the predefined symbols if -EB / -EL -# are used, so we kludge that here. A bug has been filed at -# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29413. -# -# clang doesn't suffer from these issues and our checks against -dumpmachine -# don't work so well when cross compiling, since without providing --target -# clang's output will be based upon the build machine. So for clang we simply -# unconditionally specify -EB or -EL as appropriate. -# -ifdef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -EL -else -undef-all += -UMIPSEB -U_MIPSEB -U__MIPSEB -U__MIPSEB__ -undef-all += -UMIPSEL -U_MIPSEL -U__MIPSEL -U__MIPSEL__ -predef-be += -DMIPSEB -D_MIPSEB -D__MIPSEB -D__MIPSEB__ -predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__ -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be)) -cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le)) -endif cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ -fno-omit-frame-pointer @@ -171,10 +146,13 @@ cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap cflags-$(CONFIG_CPU_TX49XX) += -march=r4600 -Wa,--trap cflags-$(CONFIG_CPU_MIPS32_R1) += -march=mips32 -Wa,--trap cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS32_R5) += -march=mips32r5 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,--trap cflags-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS64_R5) += -march=mips64r5 -Wa,--trap cflags-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,--trap +cflags-$(CONFIG_CPU_P5600) += -march=p5600 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_R5000) += -march=r5000 -Wa,--trap cflags-$(CONFIG_CPU_R5500) += $(call cc-option,-march=r5500,-march=r5000) \ -Wa,--trap @@ -288,12 +266,23 @@ ifdef CONFIG_64BIT endif endif +# When linking a 32-bit executable the LLVM linker cannot cope with a +# 32-bit load address that has been sign-extended to 64 bits. Simply +# remove the upper 32 bits then, as it is safe to do so with other +# linkers. +ifdef CONFIG_64BIT + load-ld = $(load-y) +else + load-ld = $(subst 0xffffffff,0x,$(load-y)) +endif + KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld) KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + LINKER_LOAD_ADDRESS=$(load-ld) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ PLATFORM="$(platform-y)" \ ITS_INPUTS="$(its-y)" @@ -359,12 +348,6 @@ ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin endif -ifdef CONFIG_LASAT -rom.bin rom.sw: vmlinux - $(Q)$(MAKE) $(build)=arch/mips/lasat/image \ - $(bootvars-y) $@ -endif - # # Some machines like the Indy need 32-bit ELF binaries for booting purposes. # Other need ECOFF, so we build a 32-bit ELF binary for them which we then @@ -430,7 +413,6 @@ archclean: $(Q)$(MAKE) $(clean)=arch/mips/boot $(Q)$(MAKE) $(clean)=arch/mips/boot/compressed $(Q)$(MAKE) $(clean)=arch/mips/boot/tools - $(Q)$(MAKE) $(clean)=arch/mips/lasat archheaders: $(Q)$(MAKE) $(build)=arch/mips/kernel/syscalls all diff --git a/arch/mips/alchemy/Platform b/arch/mips/alchemy/Platform index 33c9da3b077b..c8cff50b0eda 100644 --- a/arch/mips/alchemy/Platform +++ b/arch/mips/alchemy/Platform @@ -15,19 +15,16 @@ load-$(CONFIG_MIPS_DB1XXX) += 0xffffffff80100000 # # 4G-Systems MTX-1 "MeshCube" wireless router # -platform-$(CONFIG_MIPS_MTX1) += alchemy/ load-$(CONFIG_MIPS_MTX1) += 0xffffffff80100000 # # MyCable eval board # -platform-$(CONFIG_MIPS_XXS1500) += alchemy/ load-$(CONFIG_MIPS_XXS1500) += 0xffffffff80100000 # # Trapeze ITS GRP board # -platform-$(CONFIG_MIPS_GPR) += alchemy/ load-$(CONFIG_MIPS_GPR) += 0xffffffff80100000 # boards can specify their own <gpio.h> in one of their include dirs. diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c index 7faaa6d593a7..0f60efe0481e 100644 --- a/arch/mips/alchemy/common/setup.c +++ b/arch/mips/alchemy/common/setup.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/ioport.h> +#include <linux/mm.h> #include <asm/dma-coherence.h> #include <asm/mipsregs.h> @@ -72,9 +73,9 @@ void __init plat_mem_setup(void) iomem_resource.end = IOMEM_RESOURCE_END; } -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI) +#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR /* This routine should be valid for all Au1x based boards */ -phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) +phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) { unsigned long start = ALCHEMY_PCI_MEMWIN_START; unsigned long end = ALCHEMY_PCI_MEMWIN_END; @@ -90,5 +91,13 @@ phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) /* default nop */ return phys_addr; } -EXPORT_SYMBOL(__fixup_bigphys_addr); -#endif + +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size); + + return remap_pfn_range(vma, vaddr, phys_addr >> PAGE_SHIFT, size, prot); +} +EXPORT_SYMBOL(io_remap_pfn_range); +#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */ diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c index 3e0c75c0ece0..752b93d91ac9 100644 --- a/arch/mips/alchemy/devboards/db1550.c +++ b/arch/mips/alchemy/devboards/db1550.c @@ -225,7 +225,7 @@ static void __init pb1550_nand_setup(void) case 0: case 2: case 8: case 0xC: case 0xD: /* x16 NAND Flash */ pb1550_nand_pd.devwidth = 1; - /* fallthrough */ + fallthrough; case 1: case 3: case 9: case 0xE: case 0xF: /* x8 NAND, already set up */ platform_device_register(&pb1550_nand_dev); diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform index 21f9102d533c..a9257cc01c3c 100644 --- a/arch/mips/ar7/Platform +++ b/arch/mips/ar7/Platform @@ -1,6 +1,5 @@ # # Texas Instruments AR7 # -platform-$(CONFIG_AR7) += ar7/ cflags-$(CONFIG_AR7) += -I$(srctree)/arch/mips/include/asm/mach-ar7 load-$(CONFIG_AR7) += 0xffffffff94100000 diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c index b3ffe7c898eb..352d5dbc777c 100644 --- a/arch/mips/ar7/setup.c +++ b/arch/mips/ar7/setup.c @@ -57,7 +57,7 @@ const char *get_system_type(void) case TITAN_CHIP_1060: return "TI AR7 (TNETV1060)"; } - /* fall through */ + fallthrough; default: return "TI AR7 (unknown)"; } diff --git a/arch/mips/ath25/Platform b/arch/mips/ath25/Platform index ef3f81fa080b..aef098b6f405 100644 --- a/arch/mips/ath25/Platform +++ b/arch/mips/ath25/Platform @@ -1,6 +1,5 @@ # # Atheros AR531X/AR231X WiSoC # -platform-$(CONFIG_ATH25) += ath25/ cflags-$(CONFIG_ATH25) += -I$(srctree)/arch/mips/include/asm/mach-ath25 load-$(CONFIG_ATH25) += 0xffffffff80041000 diff --git a/arch/mips/ath79/Platform b/arch/mips/ath79/Platform index 2bd663647d27..57744472ed2e 100644 --- a/arch/mips/ath79/Platform +++ b/arch/mips/ath79/Platform @@ -2,6 +2,5 @@ # Atheros AR71xx/AR724x/AR913x # -platform-$(CONFIG_ATH79) += ath79/ cflags-$(CONFIG_ATH79) += -I$(srctree)/arch/mips/include/asm/mach-ath79 load-$(CONFIG_ATH79) = 0xffffffff80060000 diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index acb4fd647a30..4b7c066ac88e 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -153,8 +153,7 @@ static void __init ath79_detect_sys_type(void) case REV_ID_MAJOR_QCA9533_V2: ver = 2; ath79_soc_rev = 2; - /* fall through */ - + fallthrough; case REV_ID_MAJOR_QCA9533: ath79_soc = ATH79_SOC_QCA9533; chip = "9533"; diff --git a/arch/mips/bcm47xx/Platform b/arch/mips/bcm47xx/Platform index 70783b75fd9d..833b204fe5da 100644 --- a/arch/mips/bcm47xx/Platform +++ b/arch/mips/bcm47xx/Platform @@ -1,7 +1,6 @@ # # Broadcom BCM47XX boards # -platform-$(CONFIG_BCM47XX) += bcm47xx/ cflags-$(CONFIG_BCM47XX) += \ -I$(srctree)/arch/mips/include/asm/mach-bcm47xx load-$(CONFIG_BCM47XX) := 0xffffffff80001000 diff --git a/arch/mips/bcm63xx/Platform b/arch/mips/bcm63xx/Platform index 5f86b2fff6de..882dc40f49a2 100644 --- a/arch/mips/bcm63xx/Platform +++ b/arch/mips/bcm63xx/Platform @@ -1,7 +1,6 @@ # # Broadcom BCM63XX boards # -platform-$(CONFIG_BCM63XX) += bcm63xx/ cflags-$(CONFIG_BCM63XX) += \ -I$(srctree)/arch/mips/include/asm/mach-bcm63xx/ load-$(CONFIG_BCM63XX) := 0xffffffff80010000 diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index f61c16f57a97..8e3e199dd35d 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -304,7 +304,7 @@ void __init bcm63xx_cpu_init(void) case CPU_BMIPS3300: if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; - /* fall-through */ + fallthrough; case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; diff --git a/arch/mips/bcm63xx/dev-flash.c b/arch/mips/bcm63xx/dev-flash.c index a1093934c616..f9cc015d3dc9 100644 --- a/arch/mips/bcm63xx/dev-flash.c +++ b/arch/mips/bcm63xx/dev-flash.c @@ -94,7 +94,7 @@ static int __init bcm63xx_detect_flash_type(void) case STRAPBUS_6368_BOOT_SEL_PARALLEL: return BCM63XX_FLASH_TYPE_PARALLEL; } - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/mips/bmips/Platform b/arch/mips/bmips/Platform index 5f127fd7f4b5..1434ea31ce85 100644 --- a/arch/mips/bmips/Platform +++ b/arch/mips/bmips/Platform @@ -1,7 +1,6 @@ # # Broadcom Generic BMIPS kernel # -platform-$(CONFIG_BMIPS_GENERIC) += bmips/ cflags-$(CONFIG_BMIPS_GENERIC) += \ -I$(srctree)/arch/mips/include/asm/mach-bmips/ load-$(CONFIG_BMIPS_GENERIC) := 0xffffffff80010000 diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 0df0ee8a298d..6e56caef69f0 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -90,7 +90,7 @@ ifneq ($(zload-y),) VMLINUZ_LOAD_ADDRESS := $(zload-y) else VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS)) endif UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index d429a69bfe30..19027129add8 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -1,17 +1,19 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-y += brcm -subdir-y += cavium-octeon -subdir-y += img -subdir-y += ingenic -subdir-y += lantiq -subdir-y += loongson -subdir-y += mscc -subdir-y += mti -subdir-y += netlogic -subdir-y += ni -subdir-y += pic32 -subdir-y += qca -subdir-y += ralink -subdir-y += xilfpga +subdir-$(CONFIG_BMIPS_GENERIC) += brcm +subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon +subdir-$(CONFIG_MACH_PISTACHIO) += img +subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img +subdir-$(CONFIG_MACH_INGENIC) += ingenic +subdir-$(CONFIG_LANTIQ) += lantiq +subdir-$(CONFIG_MACH_LOONGSON64) += loongson +subdir-$(CONFIG_MSCC_OCELOT) += mscc +subdir-$(CONFIG_MIPS_MALTA) += mti +subdir-$(CONFIG_LEGACY_BOARD_SEAD3) += mti +subdir-$(CONFIG_NLM_XLP_BOARD) += netlogic +subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445) += ni +subdir-$(CONFIG_MACH_PIC32) += pic32 +subdir-$(CONFIG_ATH79) += qca +subdir-$(CONFIG_RALINK) += ralink +subdir-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += xilfpga obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y)) diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index db0ca250bd1a..75f5bfbf2c37 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -386,6 +386,9 @@ interrupt-parent = <&gpe>; interrupts = <19 4>; + + nvmem-cells = <ð0_addr>; + nvmem-cell-names = "mac-address"; }; }; diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts index f58d239c2058..8d22828787d8 100644 --- a/arch/mips/boot/dts/ingenic/gcw0.dts +++ b/arch/mips/boot/dts/ingenic/gcw0.dts @@ -4,6 +4,10 @@ #include "jz4770.dtsi" #include <dt-bindings/clock/ingenic,tcu.h> +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/iio/adc/ingenic,adc.h> +#include <dt-bindings/input/input.h> + / { compatible = "gcw,zero", "ingenic,jz4770"; model = "GCW Zero"; @@ -15,20 +19,370 @@ serial3 = &uart3; }; + memory: memory { + device_type = "memory"; + reg = <0x0 0x10000000>, + <0x30000000 0x10000000>; + }; + chosen { stdout-path = "serial2:57600n8"; }; - board { - compatible = "simple-bus"; + vcc: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "vcc"; + + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + mmc1_power: regulator@1 { + compatible = "regulator-fixed"; + regulator-name = "mmc1_vcc"; + gpio = <&gpe 9 0>; + + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc>; + }; + + headphones_amp: analog-amplifier@0 { + compatible = "simple-audio-amplifier"; + enable-gpios = <&gpf 3 GPIO_ACTIVE_LOW>; + enable-delay-ms = <50>; + + VCC-supply = <&ldo5>; + sound-name-prefix = "Headphones Amp"; + }; + + speaker_amp: analog-amplifier@1 { + compatible = "simple-audio-amplifier"; + enable-gpios = <&gpf 20 GPIO_ACTIVE_HIGH>; + + VCC-supply = <&ldo5>; + sound-name-prefix = "Speaker Amp"; + }; + + sound { + compatible = "simple-audio-card"; + + simple-audio-card,name = "gcw0-audio"; + simple-audio-card,format = "i2s"; + + simple-audio-card,widgets = + "Speaker", "Speaker", + "Headphone", "Headphones", + "Line", "FM Radio", + "Microphone", "Built-in Mic"; + simple-audio-card,routing = + "Headphones Amp INL", "LHPOUT", + "Headphones Amp INR", "RHPOUT", + "Headphones", "Headphones Amp OUTL", + "Headphones", "Headphones Amp OUTR", + "Speaker Amp INL", "LOUT", + "Speaker Amp INR", "ROUT", + "Speaker", "Speaker Amp OUTL", + "Speaker", "Speaker Amp OUTR", + "LLINEIN", "FM Radio", + "RLINEIN", "FM Radio", + "Built-in Mic", "MICBIAS", + "MIC1P", "Built-in Mic", + "MIC1N", "Built-in Mic"; + simple-audio-card,pin-switches = "Speaker", "Headphones"; + + simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_HIGH>; + simple-audio-card,aux-devs = <&speaker_amp>, <&headphones_amp>; + + simple-audio-card,bitclock-master = <&dai_codec>; + simple-audio-card,frame-master = <&dai_codec>; + + dai_cpu: simple-audio-card,cpu { + sound-dai = <&aic>; + }; + + dai_codec: simple-audio-card,codec { + sound-dai = <&codec>; + }; + }; + + rumble { + compatible = "pwm-vibrator"; + pwms = <&pwm 4 2000000 0>; + pwm-names = "enable"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_pwm4>; + }; + + backlight: backlight { + compatible = "pwm-backlight"; + pwms = <&pwm 1 40000 0>; + power-supply = <&vcc>; + + brightness-levels = <0 16 32 48 64 80 96 112 128 + 144 160 176 192 208 224 240 255>; + default-brightness-level = <12>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_pwm1>; + }; + + gpio-keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + + autorepeat; + + button@0 { + label = "D-pad up"; + linux,code = <KEY_UP>; + linux,can-disable; + gpios = <&gpe 21 GPIO_ACTIVE_LOW>; + }; + + button@1 { + label = "D-pad down"; + linux,code = <KEY_DOWN>; + linux,can-disable; + gpios = <&gpe 25 GPIO_ACTIVE_LOW>; + }; + + button@2 { + label = "D-pad left"; + linux,code = <KEY_LEFT>; + linux,can-disable; + gpios = <&gpe 23 GPIO_ACTIVE_LOW>; + }; + + button@3 { + label = "D-pad right"; + linux,code = <KEY_RIGHT>; + linux,can-disable; + gpios = <&gpe 24 GPIO_ACTIVE_LOW>; + }; + + button@4 { + label = "Button A"; + linux,code = <KEY_LEFTCTRL>; + linux,can-disable; + gpios = <&gpe 29 GPIO_ACTIVE_LOW>; + }; + + button@5 { + label = "Button B"; + linux,code = <KEY_LEFTALT>; + linux,can-disable; + gpios = <&gpe 20 GPIO_ACTIVE_LOW>; + }; + + button@6 { + label = "Button Y"; + linux,code = <KEY_SPACE>; + linux,can-disable; + gpios = <&gpe 27 GPIO_ACTIVE_LOW>; + }; + + button@7 { + label = "Button X"; + linux,code = <KEY_LEFTSHIFT>; + linux,can-disable; + gpios = <&gpe 28 GPIO_ACTIVE_LOW>; + }; + + button@8 { + label = "Left shoulder button"; + linux,code = <KEY_TAB>; + linux,can-disable; + gpios = <&gpb 20 GPIO_ACTIVE_LOW>; + }; + + button@9 { + label = "Right shoulder button"; + linux,code = <KEY_BACKSPACE>; + linux,can-disable; + gpios = <&gpe 26 GPIO_ACTIVE_LOW>; + }; + + button@10 { + label = "Start button"; + linux,code = <KEY_ENTER>; + linux,can-disable; + gpios = <&gpb 21 GPIO_ACTIVE_LOW>; + }; + + button@11 { + label = "Select button"; + linux,code = <KEY_ESC>; + linux,can-disable; + /* + * This is the only button that is active high, + * since it doubles as BOOT_SEL1. + */ + gpios = <&gpd 18 GPIO_ACTIVE_HIGH>; + }; + + button@12 { + label = "Power slider"; + linux,code = <KEY_POWER>; + linux,can-disable; + gpios = <&gpa 30 GPIO_ACTIVE_LOW>; + wakeup-source; + }; + + button@13 { + label = "Power hold"; + linux,code = <KEY_PAUSE>; + linux,can-disable; + gpios = <&gpf 11 GPIO_ACTIVE_LOW>; + }; + }; + + i2c3: i2c-controller@3 { + compatible = "i2c-gpio"; + #address-cells = <1>; + #size-cells = <0>; + + sda-gpios = <&gpd 5 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpd 4 GPIO_ACTIVE_HIGH>; + i2c-gpio,delay-us = <2>; /* 250 kHz */ + + act8600: pmic@5a { + compatible = "active-semi,act8600"; + reg = <0x5a>; + + regulators { + /* USB OTG */ + otg_vbus: SUDCDC_REG4 { + /* + * 5.3V instead of 5.0V to compensate + * for the voltage drop of a diode + * between the regulator and the + * connector. + */ + regulator-min-microvolt = <5300000>; + regulator-max-microvolt = <5300000>; + inl-supply = <&vcc>; + }; + + /* + * When this is off, there is no sound, but also + * no USB networking. + */ + ldo5: LDO5 { + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + inl-supply = <&vcc>; + }; + + /* LCD panel and FM radio */ + ldo6: LDO6 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + inl-supply = <&vcc>; + }; + + /* ??? */ + LDO7 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + /*regulator-always-on;*/ + inl-supply = <&vcc>; + }; + + /* + * The colors on the LCD are wrong when this is + * off. Which is strange, since the LCD panel + * data sheet only mentions a 3.3V input. + */ + LDO8 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + inl-supply = <&vcc>; + }; + + /* RTC fixed 3.3V */ + LDO_REG9 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + inl-supply = <&vcc>; + }; + + /* Unused fixed 1.2V */ + LDO_REG10 { + inl-supply = <&vcc>; + }; + }; + }; + }; + + leds { + compatible = "gpio-leds"; + + led { + gpios = <&gpb 30 GPIO_ACTIVE_LOW>; + default-state = "on"; + }; + }; + + spi { + compatible = "spi-gpio"; #address-cells = <1>; - #size-cells = <1>; - ranges; + #size-cells = <0>; + + sck-gpios = <&gpe 15 GPIO_ACTIVE_HIGH>; + mosi-gpios = <&gpe 17 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpe 16 GPIO_ACTIVE_HIGH>; + num-chipselects = <1>; + + nt39016@0 { + compatible = "kingdisplay,kd035g6-54nt"; + reg = <0>; + + spi-max-frequency = <3125000>; + spi-3wire; + spi-cs-high; - otg_phy: otg-phy { - compatible = "usb-nop-xceiv"; - clocks = <&cgu JZ4770_CLK_OTG_PHY>; - clock-names = "main_clk"; + reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>; + + backlight = <&backlight>; + power-supply = <&ldo6>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; + }; + + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + label = "mini-USB"; + type = "mini"; + + /* + * USB OTG is not yet working reliably, the ID detection + * mechanism tends to fry easily for unknown reasons. + * Until this is fixed, disable OTG by not providing the + * ID GPIO to the driver. + */ + //id-gpios = <&gpf 18 GPIO_ACTIVE_LOW>; + + vbus-gpios = <&gpb 5 GPIO_ACTIVE_HIGH>; + vbus-supply = <&otg_vbus>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_otg>; + + port { + usb_ep: endpoint { + remote-endpoint = <&usb_otg_ep>; + }; }; }; }; @@ -37,24 +391,86 @@ clock-frequency = <12000000>; }; +&pinctrl { + pins_lcd: lcd { + function = "lcd"; + groups = "lcd-24bit"; + }; + + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data"; + }; + + pins_mmc0: mmc0 { + function = "mmc0"; + groups = "mmc0-1bit-a", "mmc0-4bit-a"; + }; + + pins_mmc1: mmc1 { + function = "mmc1"; + groups = "mmc1-1bit-d", "mmc1-4bit-d"; + }; + + pins_otg: otg { + otg-vbus-pin { + function = "otg"; + groups = "otg-vbus"; + }; + + vbus-pin { + pins = "PB5"; + bias-disable; + }; + }; + + pins_pwm1: pwm1 { + function = "pwm1"; + groups = "pwm1"; + }; + + pins_pwm4: pwm4 { + function = "pwm4"; + groups = "pwm4"; + }; +}; + &uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; + status = "okay"; }; &cgu { - /* Put high-speed peripherals under PLL1, such that we can change the + /* + * Put high-speed peripherals under PLL1, such that we can change the * PLL0 frequency on demand without having to suspend peripherals. * We use a rate of 432 MHz, which is the least common multiple of * 27 MHz (required by TV encoder) and 48 MHz (required by USB host). + * Put the GPU under PLL0 since we want a higher frequency. + * Use the 32 kHz oscillator as the parent of the RTC for a higher + * precision. */ assigned-clocks = <&cgu JZ4770_CLK_PLL1>, - <&cgu JZ4770_CLK_UHC>; + <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_RTC>, + <&cgu JZ4770_CLK_UHC>, + <&cgu JZ4770_CLK_LPCLK_MUX>, + <&cgu JZ4770_CLK_MMC0_MUX>, + <&cgu JZ4770_CLK_MMC1_MUX>; assigned-clock-parents = <0>, + <&cgu JZ4770_CLK_PLL0>, + <&cgu JZ4770_CLK_OSC32K>, + <&cgu JZ4770_CLK_PLL1>, + <&cgu JZ4770_CLK_PLL1>, + <&cgu JZ4770_CLK_PLL1>, <&cgu JZ4770_CLK_PLL1>; assigned-clock-rates = - <432000000>; + <432000000>, + <600000000>; }; &uhc { @@ -63,10 +479,69 @@ }; &tcu { - /* 750 kHz for the system timer and clocksource */ - assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; - assigned-clock-rates = <750000>, <750000>; + /* + * 750 kHz for the system timer and clocksource, 12 MHz for the OST, + * and use RTC as the parent for the watchdog clock + */ + assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>, + <&tcu TCU_CLK_OST>, <&tcu TCU_CLK_WDT>; + assigned-clock-parents = <0>, <0>, <0>, <&cgu JZ4770_CLK_RTC>; + assigned-clock-rates = <750000>, <750000>, <12000000>; - /* PWM1 is in use, so reserve channel #2 for the clocksource */ + /* PWM1 is in use, so use channel #2 for the clocksource */ ingenic,pwm-channels-mask = <0xfa>; }; + +&usb_otg { + port { + usb_otg_ep: endpoint { + remote-endpoint = <&usb_ep>; + }; + }; +}; + +&otg_phy { + vcc-supply = <&ldo5>; +}; + +&rtc { + clocks = <&cgu JZ4770_CLK_RTC>; + clock-names = "rtc"; + + system-power-controller; +}; + +&mmc0 { + status = "okay"; + + bus-width = <4>; + max-frequency = <48000000>; + vmmc-supply = <&vcc>; + non-removable; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mmc0>; +}; + +&mmc1 { + status = "okay"; + + bus-width = <4>; + max-frequency = <48000000>; + cd-gpios = <&gpb 2 GPIO_ACTIVE_LOW>; + vmmc-supply = <&mmc1_power>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mmc1>; +}; + +&lcd { + pinctrl-names = "default"; + pinctrl-0 = <&pins_lcd>; + + port { + panel_output: endpoint { + remote-endpoint = <&panel_input>; + }; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/gcw0_proto.dts b/arch/mips/boot/dts/ingenic/gcw0_proto.dts new file mode 100644 index 000000000000..02df22f8ae0f --- /dev/null +++ b/arch/mips/boot/dts/ingenic/gcw0_proto.dts @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "gcw0.dts" + +/ { + model = "GCW Zero Prototype"; +}; + +&memory { + /* Prototype has only 256 MiB of RAM */ + reg = <0x0 0x10000000>; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index a3301bab9231..1520585c235c 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -55,10 +55,10 @@ #clock-cells = <1>; - clocks = <&cgu JZ4740_CLK_RTC - &cgu JZ4740_CLK_EXT - &cgu JZ4740_CLK_PCLK - &cgu JZ4740_CLK_TCU>; + clocks = <&cgu JZ4740_CLK_RTC>, + <&cgu JZ4740_CLK_EXT>, + <&cgu JZ4740_CLK_PCLK>, + <&cgu JZ4740_CLK_TCU>; clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; @@ -74,6 +74,20 @@ clocks = <&tcu TCU_CLK_WDT>; clock-names = "wdt"; }; + + pwm: pwm@40 { + compatible = "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; }; rtc_dev: rtc@10003000 { @@ -241,10 +255,10 @@ reg = <0x13010000 0x54>; #address-cells = <2>; #size-cells = <1>; - ranges = <1 0 0x18000000 0x4000000 - 2 0 0x14000000 0x4000000 - 3 0 0x0c000000 0x4000000 - 4 0 0x08000000 0x4000000>; + ranges = <1 0 0x18000000 0x4000000>, + <2 0 0x14000000 0x4000000>, + <3 0 0x0c000000 0x4000000>, + <4 0 0x08000000 0x4000000>; clocks = <&cgu JZ4740_CLK_MCLK>; }; @@ -258,8 +272,7 @@ dmac: dma-controller@13020000 { compatible = "ingenic,jz4740-dma"; - reg = <0x13020000 0xbc - 0x13020300 0x14>; + reg = <0x13020000 0xbc>, <0x13020300 0x14>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 0bfb9edff3d0..fa11ac950499 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <dt-bindings/clock/jz4770-cgu.h> +#include <dt-bindings/clock/ingenic,tcu.h> / { #address-cells = <1>; @@ -37,13 +38,25 @@ }; cgu: jz4770-cgu@10000000 { - compatible = "ingenic,jz4770-cgu"; + compatible = "ingenic,jz4770-cgu", "simple-mfd"; reg = <0x10000000 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10000000 0x100>; clocks = <&ext>, <&osc32k>; clock-names = "ext", "osc32k"; #clock-cells = <1>; + + otg_phy: usb-phy@3c { + compatible = "ingenic,jz4770-phy"; + reg = <0x3c 0x10>; + + clocks = <&cgu JZ4770_CLK_OTG_PHY>; + + #phy-cells = <0>; + }; }; tcu: timer@10002000 { @@ -55,9 +68,9 @@ #clock-cells = <1>; - clocks = <&cgu JZ4770_CLK_RTC - &cgu JZ4770_CLK_EXT - &cgu JZ4770_CLK_PCLK>; + clocks = <&cgu JZ4770_CLK_RTC>, + <&cgu JZ4770_CLK_EXT>, + <&cgu JZ4770_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -65,6 +78,47 @@ interrupt-parent = <&intc>; interrupts = <27 26 25>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4770-watchdog", + "ingenic,jz4740-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; + + pwm: pwm@40 { + compatible = "ingenic,jz4770-pwm", "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; + + ost: timer@e0 { + compatible = "ingenic,jz4770-ost"; + reg = <0xe0 0x20>; + + clocks = <&tcu TCU_CLK_OST>; + clock-names = "ost"; + + interrupts = <15>; + }; + }; + + rtc: rtc@10003000 { + compatible = "ingenic,jz4770-rtc", "ingenic,jz4760-rtc"; + reg = <0x10003000 0x40>; + + interrupt-parent = <&intc>; + interrupts = <32>; }; pinctrl: pin-controller@10010000 { @@ -165,6 +219,93 @@ }; }; + aic: audio-controller@10020000 { + compatible = "ingenic,jz4770-i2s"; + reg = <0x10020000 0x94>; + + #sound-dai-cells = <0>; + + clocks = <&cgu JZ4770_CLK_AIC>, <&cgu JZ4770_CLK_I2S>, + <&cgu JZ4770_CLK_EXT>, <&cgu JZ4770_CLK_PLL0>; + clock-names = "aic", "i2s", "ext", "pll half"; + + interrupt-parent = <&intc>; + interrupts = <34>; + + dmas = <&dmac0 25 0xffffffff>, <&dmac0 24 0xffffffff>; + dma-names = "rx", "tx"; + }; + + codec: audio-codec@100200a0 { + compatible = "ingenic,jz4770-codec"; + reg = <0x100200a4 0x8>; + + #sound-dai-cells = <0>; + + clocks = <&cgu JZ4770_CLK_AIC>; + clock-names = "aic"; + }; + + mmc0: mmc@10021000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10021000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC0>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <37>; + + dmas = <&dmac1 27 0xffffffff>, <&dmac1 26 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + + mmc1: mmc@10022000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10022000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC1>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <36>; + + dmas = <&dmac1 31 0xffffffff>, <&dmac1 30 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + + mmc2: mmc@10023000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10023000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC2>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <35>; + + dmas = <&dmac1 37 0xffffffff>, <&dmac1 36 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + uart0: serial@10030000 { compatible = "ingenic,jz4770-uart"; reg = <0x10030000 0x100>; @@ -217,34 +358,63 @@ status = "disabled"; }; + adc: adc@10070000 { + compatible = "ingenic,jz4770-adc"; + reg = <0x10070000 0x30>; + + #io-channel-cells = <1>; + + clocks = <&cgu JZ4770_CLK_ADC>; + clock-names = "adc"; + + interrupt-parent = <&intc>; + interrupts = <18>; + }; + + gpu: gpu@13040000 { + compatible = "vivante,gc"; + reg = <0x13040000 0x10000>; + + clocks = <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_GPU>; + clock-names = "bus", "core", "shader"; + + interrupt-parent = <&intc>; + interrupts = <6>; + }; + + lcd: lcd-controller@13050000 { + compatible = "ingenic,jz4770-lcd"; + reg = <0x13050000 0x300>; + + interrupt-parent = <&intc>; + interrupts = <31>; + + clocks = <&cgu JZ4770_CLK_LPCLK_MUX>; + clock-names = "lcd_pclk"; + }; + dmac0: dma-controller@13420000 { compatible = "ingenic,jz4770-dma"; - reg = <0x13420000 0xC0 - 0x13420300 0x20>; + reg = <0x13420000 0xC0>, <0x13420300 0x20>; - #dma-cells = <1>; + #dma-cells = <2>; clocks = <&cgu JZ4770_CLK_DMA>; interrupt-parent = <&intc>; interrupts = <24>; - - /* Disable dmac0 until we have something that uses it */ - status = "disabled"; }; dmac1: dma-controller@13420100 { compatible = "ingenic,jz4770-dma"; - reg = <0x13420100 0xC0 - 0x13420400 0x20>; + reg = <0x13420100 0xC0>, <0x13420400 0x20>; - #dma-cells = <1>; + #dma-cells = <2>; clocks = <&cgu JZ4770_CLK_DMA>; interrupt-parent = <&intc>; interrupts = <23>; - - /* Disable dmac1 until we have something that uses it */ - status = "disabled"; }; uhc: uhc@13430000 { @@ -260,4 +430,29 @@ status = "disabled"; }; + + usb_otg: usb@13440000 { + compatible = "ingenic,jz4770-musb"; + reg = <0x13440000 0x10000>; + + clocks = <&cgu JZ4770_CLK_OTG>; + clock-names = "udc"; + + interrupt-parent = <&intc>; + interrupts = <21>; + interrupt-names = "mc"; + + phys = <&otg_phy>; + + usb-role-switch; + }; + + rom: memory@1fc00000 { + compatible = "mtd-rom"; + probe-type = "map_rom"; + reg = <0x1fc00000 0x2000>; + + bank-width = <4>; + device-width = <1>; + }; }; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index bb89653d16a3..b7f409a7cf5d 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -58,9 +58,9 @@ #clock-cells = <1>; - clocks = <&cgu JZ4780_CLK_RTCLK - &cgu JZ4780_CLK_EXCLK - &cgu JZ4780_CLK_PCLK>; + clocks = <&cgu JZ4780_CLK_RTCLK>, + <&cgu JZ4780_CLK_EXCLK>, + <&cgu JZ4780_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -76,6 +76,30 @@ clocks = <&tcu TCU_CLK_WDT>; clock-names = "wdt"; }; + + pwm: pwm@40 { + compatible = "ingenic,jz4780-pwm", "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; + + ost: timer@e0 { + compatible = "ingenic,jz4780-ost", "ingenic,jz4770-ost"; + reg = <0xe0 0x20>; + + clocks = <&tcu TCU_CLK_OST>; + clock-names = "ost"; + + interrupts = <15>; + }; }; rtc_dev: rtc@10003000 { @@ -196,8 +220,7 @@ gpio-miso = <&gpe 14 0>; gpio-sck = <&gpe 15 0>; gpio-mosi = <&gpe 17 0>; - cs-gpios = <&gpe 16 0 - &gpe 18 0>; + cs-gpios = <&gpe 16 0>, <&gpe 18 0>; spidev@0 { compatible = "spidev"; @@ -358,26 +381,40 @@ }; nemc: nemc@13410000 { - compatible = "ingenic,jz4780-nemc"; + compatible = "ingenic,jz4780-nemc", "simple-mfd"; reg = <0x13410000 0x10000>; #address-cells = <2>; #size-cells = <1>; - ranges = <1 0 0x1b000000 0x1000000 - 2 0 0x1a000000 0x1000000 - 3 0 0x19000000 0x1000000 - 4 0 0x18000000 0x1000000 - 5 0 0x17000000 0x1000000 - 6 0 0x16000000 0x1000000>; + ranges = <0 0 0x13410000 0x10000>, + <1 0 0x1b000000 0x1000000>, + <2 0 0x1a000000 0x1000000>, + <3 0 0x19000000 0x1000000>, + <4 0 0x18000000 0x1000000>, + <5 0 0x17000000 0x1000000>, + <6 0 0x16000000 0x1000000>; clocks = <&cgu JZ4780_CLK_NEMC>; status = "disabled"; + + efuse: efuse@d0 { + reg = <0 0xd0 0x30>; + compatible = "ingenic,jz4780-efuse"; + + clocks = <&cgu JZ4780_CLK_AHB2>; + + #address-cells = <1>; + #size-cells = <1>; + + eth0_addr: eth-mac-addr@0x22 { + reg = <0x22 0x6>; + }; + }; }; dma: dma@13420000 { compatible = "ingenic,jz4780-dma"; - reg = <0x13420000 0x400 - 0x13421000 0x40>; + reg = <0x13420000 0x400>, <0x13421000 0x40>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index 147f7d5c243a..59a63a0985a8 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -58,9 +58,9 @@ #clock-cells = <1>; - clocks = <&cgu X1000_CLK_RTCLK - &cgu X1000_CLK_EXCLK - &cgu X1000_CLK_PCLK>; + clocks = <&cgu X1000_CLK_RTCLK>, + <&cgu X1000_CLK_EXCLK>, + <&cgu X1000_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -239,8 +239,7 @@ pdma: dma-controller@13420000 { compatible = "ingenic,x1000-dma"; - reg = <0x13420000 0x400 - 0x13421000 0x40>; + reg = <0x13420000 0x400>, <0x13421000 0x40>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi index 45c54d555fa4..d0d5d60a8697 100644 --- a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi +++ b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi @@ -9,6 +9,18 @@ 0 0x40000000 0 0x40000000 0 0x40000000 0xfd 0xfe000000 0xfd 0xfe000000 0 0x2000000 /* PCI Config Space */>; + pci@1a000000 { + compatible = "loongson,rs780e-pci"; + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + + reg = <0 0x1a000000 0 0x02000000>; + + ranges = <0x01000000 0 0x00004000 0 0x18004000 0 0x00004000>, + <0x02000000 0 0x40000000 0 0x40000000 0 0x40000000>; + }; + isa { compatible = "isa"; #address-cells = <2>; @@ -21,6 +33,11 @@ interrupts = <8>; interrupt-parent = <&htpic>; }; + + acpi@800 { + compatible = "loongson,rs780e-acpi"; + reg = <1 0x800 0x100>; + }; }; }; }; diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 797d336db54d..f94e8a02ed06 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -214,7 +214,7 @@ miim1: miim1 { pins = "GPIO_14", "GPIO_15"; - function = "miim1"; + function = "miim"; }; }; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 8f5aed760abb..83b3c0ce135a 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -59,7 +59,7 @@ #qca,ddr-wb-channel-cells = <1>; }; - uart: uart@18020000 { + uart: serial@18020000 { compatible = "qca,ar9330-uart"; reg = <0x18020000 0x14>; diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index 0f2b20044834..7695d326df11 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -3,6 +3,7 @@ #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> +#include <dt-bindings/leds/common.h> #include "ar9331.dtsi" @@ -22,8 +23,9 @@ leds { compatible = "gpio-leds"; - system { - label = "dpt-module:green:system"; + led-0 { + function = LED_FUNCTION_STATUS; + color = <LED_COLOR_ID_GREEN>; gpios = <&gpio 27 GPIO_ACTIVE_LOW>; default-state = "off"; }; diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform index 45be853700e6..4adef38dea9d 100644 --- a/arch/mips/cavium-octeon/Platform +++ b/arch/mips/cavium-octeon/Platform @@ -1,7 +1,6 @@ # # Cavium Octeon # -platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/ cflags-$(CONFIG_CAVIUM_OCTEON_SOC) += \ -I$(srctree)/arch/mips/include/asm/mach-cavium-octeon load-$(CONFIG_CAVIUM_OCTEON_SOC) += 0xffffffff81100000 diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c index cc94cfa545b4..cb210d2ef0c4 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c @@ -59,18 +59,6 @@ int __cvmx_helper_npi_probe(int interface) && !OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X)) /* The packet engines didn't exist before pass 2 */ return 4; -#if 0 - /* - * Technically CN30XX, CN31XX, and CN50XX contain packet - * engines, but nobody ever uses them. Since this is the case, - * we disable them here. - */ - else if (OCTEON_IS_MODEL(OCTEON_CN31XX) - || OCTEON_IS_MODEL(OCTEON_CN50XX)) - return 2; - else if (OCTEON_IS_MODEL(OCTEON_CN30XX)) - return 1; -#endif #endif return 0; } diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c index b077597c668a..b0efc35e95c4 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-pko.c +++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c @@ -489,7 +489,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue, config.s.qos_mask = 0xff; break; } - /* fall through - to the error case, when Pass 1 */ + fallthrough; /* to the error case, when Pass 1 */ default: cvmx_dprintf("ERROR: cvmx_pko_config_port: Invalid " "priority %llu\n", diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 51685f893eab..d56e9b9d2e43 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -141,7 +141,7 @@ static void octeon2_usb_clocks_start(struct device *dev) default: pr_err("Invalid UCTL clock rate of %u, using 12000000 instead\n", clock_rate); - /* Fall through */ + fallthrough; case 12000000: clk_rst_ctl.s.p_refclk_div = 0; break; @@ -1116,7 +1116,7 @@ end_led: new_f[0] = cpu_to_be32(48000000); fdt_setprop_inplace(initial_boot_params, usbn, "refclk-frequency", new_f, sizeof(new_f)); - /* Fall through ...*/ + fallthrough; case USB_CLOCK_TYPE_REF_12: /* Missing "refclk-type" defaults to external. */ fdt_nop_property(initial_boot_params, usbn, "refclk-type"); diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index cc88a08bc1f7..1fd85c559700 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -398,7 +398,7 @@ static int dwc3_octeon_clocks_start(struct device *dev, u64 base) default: dev_err(dev, "Invalid ref_clk %u, using 100000000 instead\n", clock_rate); - /* fall through */ + fallthrough; case 100000000: mpll_mul = 0x19; if (ref_clk_sel < 2) diff --git a/arch/mips/cobalt/Platform b/arch/mips/cobalt/Platform index 34123efd6dfe..4254895ad6f4 100644 --- a/arch/mips/cobalt/Platform +++ b/arch/mips/cobalt/Platform @@ -1,6 +1,5 @@ # # Cobalt Server # -platform-$(CONFIG_MIPS_COBALT) += cobalt/ cflags-$(CONFIG_MIPS_COBALT) += -I$(srctree)/arch/mips/include/asm/mach-cobalt load-$(CONFIG_MIPS_COBALT) += 0xffffffff80080000 diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 3d14d67dc746..96622a2ad333 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -46,7 +46,6 @@ CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_NETDEVICES=y CONFIG_ATH9K=m diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index 0db0088bbc1c..e511fe0243a5 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_MODULES=y CONFIG_KERNEL_XZ=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -29,6 +28,7 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set # CONFIG_SUSPEND is not set +CONFIG_MODULES=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set CONFIG_CMA=y @@ -38,17 +38,12 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y # CONFIG_FW_LOADER is not set # CONFIG_ALLOW_DEV_COREDUMP is not set -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=32 CONFIG_MTD=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_JZ4780=y @@ -72,9 +67,8 @@ CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set -# CONFIG_INPUT_KEYBOARD is not set +CONFIG_KEYBOARD_GPIO=m # CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_LEGACY_PTY_COUNT=2 CONFIG_SERIAL_8250=y @@ -89,7 +83,7 @@ CONFIG_I2C_JZ4780=y CONFIG_SPI=y CONFIG_SPI_GPIO=y CONFIG_GPIO_SYSFS=y -CONFIG_KEYBOARD_GPIO=m +CONFIG_POWER_SUPPLY=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_JZ4740_WDT=y @@ -97,17 +91,45 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_DEBUG=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_ACT8865=y +CONFIG_RC_CORE=m +CONFIG_LIRC=y +CONFIG_RC_DEVICES=y +CONFIG_IR_GPIO_CIR=m +CONFIG_IR_GPIO_TX=m +CONFIG_MEDIA_SUPPORT=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y CONFIG_MMC_JZ4740=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_ONESHOT=y +CONFIG_LEDS_TRIGGER_MTD=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_BACKLIGHT=m +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_LEDS_TRIGGER_ACTIVITY=y +CONFIG_LEDS_TRIGGER_GPIO=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_LEDS_TRIGGER_TRANSIENT=y +CONFIG_LEDS_TRIGGER_CAMERA=m +CONFIG_LEDS_TRIGGER_PANIC=y +CONFIG_LEDS_TRIGGER_NETDEV=y +CONFIG_LEDS_TRIGGER_PATTERN=y +CONFIG_LEDS_TRIGGER_AUDIO=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_JZ4740=y CONFIG_DMADEVICES=y CONFIG_DMA_JZ4780=y +CONFIG_INGENIC_OST=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_MEMORY=y +CONFIG_PWM=y +CONFIG_PWM_JZ4740=m CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y @@ -156,11 +178,13 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=y CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y CONFIG_PANIC_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set @@ -169,21 +193,3 @@ CONFIG_STACKTRACE=y # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused" -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_MTD=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_ONESHOT=y -CONFIG_LEDS_TRIGGER_ONESHOT=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -CONFIG_LEDS_TRIGGER_CPU=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -CONFIG_LEDS_TRIGGER_TRANSIENT=y -CONFIG_LEDS_TRIGGER_CAMERA=m -CONFIG_LIRC=y -CONFIG_MEDIA_SUPPORT=m -CONFIG_RC_DEVICES=y -CONFIG_IR_GPIO_CIR=m -CONFIG_IR_GPIO_TX=m diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index e6f3e8e3da39..b8bd66300996 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -92,7 +92,6 @@ CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ECC_SW_BCH=y diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig index a3e3eb3c5a8b..48131cb47e66 100644 --- a/arch/mips/configs/gcw0_defconfig +++ b/arch/mips/configs/gcw0_defconfig @@ -1,27 +1,152 @@ +CONFIG_DEFAULT_HOSTNAME="gcw0" CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_EMBEDDED=y +CONFIG_PROFILING=y CONFIG_MACH_INGENIC=y CONFIG_JZ4770_GCW0=y CONFIG_HIGHMEM=y # CONFIG_SECCOMP is not set -# CONFIG_SUSPEND is not set +CONFIG_MIPS_RAW_APPENDED_DTB=y +CONFIG_MIPS_CMDLINE_DTB_EXTEND=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_BOUNCE is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y +CONFIG_CFG80211=y +CONFIG_MAC80211=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=0 CONFIG_NETDEVICES=y +# CONFIG_ETHERNET is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +# CONFIG_WLAN_VENDOR_ATH is not set +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +CONFIG_RTL8192CU=m +# CONFIG_RTLWIFI_DEBUG is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_PWM_VIBRA=y +# CONFIG_SERIO is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_INGENIC=y +CONFIG_HW_RANDOM=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_SPI_GPIO=y +CONFIG_POWER_SUPPLY=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_JZ4740_WDT=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_ACT8865=y +CONFIG_DRM=y +CONFIG_DRM_FBDEV_OVERALLOC=300 +CONFIG_DRM_PANEL_NOVATEK_NT39016=y +CONFIG_DRM_INGENIC=y +CONFIG_DRM_ETNAVIV=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_PROC_FS is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_SPI is not set +# CONFIG_SND_MIPS is not set +# CONFIG_SND_USB is not set +CONFIG_SND_SOC=y +CONFIG_SND_JZ4740_SOC_I2S=y +CONFIG_SND_SOC_JZ4770_CODEC=y +CONFIG_SND_SOC_SIMPLE_AMPLIFIER=y +CONFIG_SND_SIMPLE_CARD=y +CONFIG_USB_CONN_GPIO=y CONFIG_USB=y +CONFIG_USB_OTG=y +CONFIG_USB_OTG_BLACKLIST_HUB=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y -CONFIG_NOP_USB_XCEIV=y -CONFIG_TMPFS=y +CONFIG_USB_MUSB_HDRC=y +CONFIG_USB_MUSB_GADGET=y +CONFIG_USB_MUSB_JZ4740=y +CONFIG_USB_INVENTRA_DMA=y +CONFIG_JZ4770_PHY=y +CONFIG_USB_GADGET=y +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_ETH=y +CONFIG_MMC=y +# CONFIG_PWRSEQ_EMMC is not set +# CONFIG_PWRSEQ_SIMPLE is not set +CONFIG_MMC_JZ4740=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_LEDS_TRIGGER_PANIC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_JZ4740=y +CONFIG_DMADEVICES=y +CONFIG_DMA_JZ4780=y +# CONFIG_VIRTIO_MENU is not set +CONFIG_STAGING=y +CONFIG_R8188EU=m +CONFIG_INGENIC_OST=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_IIO=y +CONFIG_IIO_BUFFER=y +CONFIG_IIO_BUFFER_CB=y +CONFIG_IIO_KFIFO_BUF=y +CONFIG_MXC6255=m +CONFIG_INGENIC_ADC=y +CONFIG_PWM=y +CONFIG_PWM_JZ4740=y +CONFIG_EXT4_FS=y +CONFIG_VFAT_FS=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_FILE_DIRECT=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity" +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_FONTS=y +CONFIG_FONT_6x10=y +CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 7626f2a75b03..510709565404 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -9,7 +9,6 @@ CONFIG_GENERIC_PHY=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_M25P80=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_SPI_NOR=y diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig deleted file mode 100644 index 00cf461db971..000000000000 --- a/arch/mips/configs/lasat_defconfig +++ /dev/null @@ -1,55 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -# CONFIG_EPOLL is not set -# CONFIG_SIGNALFD is not set -# CONFIG_TIMERFD is not set -# CONFIG_EVENTFD is not set -# CONFIG_KALLSYMS is not set -CONFIG_SLAB=y -CONFIG_LASAT=y -CONFIG_PICVUE=y -CONFIG_PICVUE_PROC=y -CONFIG_DS1603=y -CONFIG_LASAT_SYSCTL=y -CONFIG_HZ_1000=y -# CONFIG_SECCOMP is not set -CONFIG_PCI=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_BLK_DEV_SD=y -CONFIG_ATA=y -CONFIG_PATA_CMD64X=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_LEGACY=y -CONFIG_NETDEVICES=y -CONFIG_PCNET32=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIO_RAW=y -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_SERIAL_8250_PCI is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_CONFIGFS_FS=y -CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 51675f5000d6..3d4c7e9996c5 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -21,6 +21,7 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y CONFIG_MACH_LOONGSON64=y CONFIG_SMP=y CONFIG_HZ_256=y @@ -216,6 +217,7 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_HW_RANDOM=y CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y @@ -229,7 +231,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m CONFIG_DRM=y -CONFIG_DRM_RADEON=y +CONFIG_DRM_RADEON=m CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig deleted file mode 100644 index 507ad91b21e7..000000000000 --- a/arch/mips/configs/markeins_defconfig +++ /dev/null @@ -1,185 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_PREEMPT=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_NEC_MARKEINS=y -CONFIG_HZ_1000=y -CONFIG_PCI=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_NET_KEY=y -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_TCP_MD5SIG=y -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETWORK_SECMARK=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_FW_LOADER=m -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP=y -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_SCSI=m -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=m -CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_AIC94XX=m -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_NETDEVICES=y -CONFIG_TUN=m -CONFIG_CHELSIO_T3=m -CONFIG_NATSEMI=y -CONFIG_QLA3XXX=m -CONFIG_NETXEN_NIC=m -CONFIG_PPP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_INPUT_EVDEV=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_DEBUG_CORE=y -CONFIG_I2C_DEBUG_BUS=y -# CONFIG_HID is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_XFS_FS=m -# CONFIG_DNOTIFY is not set -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NLS_DEFAULT="" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0,115200 mem=192m ip=bootp root=/dev/nfs rw" diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig deleted file mode 100644 index 6ad1a2381226..000000000000 --- a/arch/mips/configs/msp71xx_defconfig +++ /dev/null @@ -1,77 +0,0 @@ -CONFIG_LOCALVERSION="-pmc" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_PREEMPT=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -# CONFIG_SHMEM is not set -CONFIG_SLAB=y -CONFIG_PMC_MSP=y -CONFIG_PMC_MSP7120_GW=y -CONFIG_CPU_MIPS32_R2=y -CONFIG_NR_CPUS=2 -CONFIG_PCI=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_NET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=y -CONFIG_NET_KEY=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_INET_AH=y -CONFIG_INET_ESP=y -CONFIG_INET_IPCOMP=y -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_BRIDGE=y -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FW_LOADER is not set -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_RAM=y -CONFIG_MTD_PMC_MSP_EVM=y -CONFIG_BLK_DEV_RAM=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_NETDEVICES=y -CONFIG_DUMMY=y -CONFIG_PPP=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_SERIAL_8250_PCI is not set -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_PMCMSP=y -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -# CONFIG_USB_EHCI_TT_NEWSCHED is not set -CONFIG_USB_STORAGE=y -CONFIG_EXT2_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_JFFS2_FS=y -CONFIG_SQUASHFS=y -CONFIG_SQUASHFS_EMBEDDED=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ISO8859_1=y -CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig index 24e07180c57d..b9adf15ebbec 100644 --- a/arch/mips/configs/pistachio_defconfig +++ b/arch/mips/configs/pistachio_defconfig @@ -127,7 +127,6 @@ CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_BLOCK=y diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index 8c2ead53007a..fec5851c164b 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -76,7 +76,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_EEPROM_93CX6=m CONFIG_SCSI=y diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform index cf55a6f4e720..c82391e832f9 100644 --- a/arch/mips/dec/Platform +++ b/arch/mips/dec/Platform @@ -1,7 +1,6 @@ # # DECstation family # -platform-$(CONFIG_MACH_DECSTATION) += dec/ cflags-$(CONFIG_MACH_DECSTATION) += \ -I$(srctree)/arch/mips/include/asm/mach-dec libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index a25ef822e725..ea5b5a83f1e1 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -304,8 +304,8 @@ spurious: */ FEXPORT(dec_intr_unimplemented) move a1,t0 # cheats way of printing an arg! - PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x"); + ASM_PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x"); FEXPORT(asic_intr_unimplemented) move a1,t0 # cheats way of printing an arg! - PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x"); + ASM_PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x"); diff --git a/arch/mips/dec/tc.c b/arch/mips/dec/tc.c index 732027c79834..dba58397668e 100644 --- a/arch/mips/dec/tc.c +++ b/arch/mips/dec/tc.c @@ -52,7 +52,7 @@ int __init tc_bus_get_info(struct tc_bus *tbus) case MACH_DS5900: tbus->ext_slot_base = 0x20000000; tbus->ext_slot_size = 0x20000000; - /* fall through */ + fallthrough; case MACH_DS5000_1XX: tbus->num_tcslots = 3; break; diff --git a/arch/mips/emma/Makefile b/arch/mips/emma/Makefile deleted file mode 100644 index bc03082064ca..000000000000 --- a/arch/mips/emma/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_SOC_EMMA2RH) += common/ - -# -# NEC EMMA2RH Mark-eins -# -obj-$(CONFIG_NEC_MARKEINS) += markeins/ diff --git a/arch/mips/emma/Platform b/arch/mips/emma/Platform deleted file mode 100644 index 0282f7f99b88..000000000000 --- a/arch/mips/emma/Platform +++ /dev/null @@ -1,4 +0,0 @@ -platform-$(CONFIG_SOC_EMMA2RH) += emma/ -cflags-$(CONFIG_SOC_EMMA2RH) += \ - -I$(srctree)/arch/mips/include/asm/mach-emma2rh -load-$(CONFIG_NEC_MARKEINS) += 0xffffffff88100000 diff --git a/arch/mips/emma/common/Makefile b/arch/mips/emma/common/Makefile deleted file mode 100644 index a754abd1beb9..000000000000 --- a/arch/mips/emma/common/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Copyright (C) NEC Electronics Corporation 2005-2006 -# - -obj-$(CONFIG_NEC_MARKEINS) += prom.o diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c deleted file mode 100644 index 7c3a6f32beda..000000000000 --- a/arch/mips/emma/common/prom.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/common/prom.c - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/memblock.h> - -#include <asm/addrspace.h> -#include <asm/bootinfo.h> -#include <asm/emma/emma2rh.h> - -const char *get_system_type(void) -{ -#ifdef CONFIG_NEC_MARKEINS - return "NEC EMMA2RH Mark-eins"; -#else -#error Unknown NEC board -#endif -} - -/* [jsun@junsun.net] PMON passes arguments in C main() style */ -void __init prom_init(void) -{ - int argc = fw_arg0; - char **arg = (char **)fw_arg1; - int i; - - /* if user passes kernel args, ignore the default one */ - if (argc > 1) - arcs_cmdline[0] = '\0'; - - /* arg[0] is "g", the rest is boot parameters */ - for (i = 1; i < argc; i++) { - if (strlen(arcs_cmdline) + strlen(arg[i]) + 1 - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, arg[i]); - strcat(arcs_cmdline, " "); - } - -#ifdef CONFIG_NEC_MARKEINS - add_memory_region(0, EMMA2RH_RAM_SIZE, BOOT_MEM_RAM); -#else -#error Unknown NEC board -#endif -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/emma/markeins/Makefile b/arch/mips/emma/markeins/Makefile deleted file mode 100644 index 8c8649069504..000000000000 --- a/arch/mips/emma/markeins/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Copyright (C) NEC Electronics Corporation 2005-2006 -# - -obj-$(CONFIG_NEC_MARKEINS) += irq.o setup.o led.o platform.o diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c deleted file mode 100644 index 4aebf559be2e..000000000000 --- a/arch/mips/emma/markeins/irq.c +++ /dev/null @@ -1,293 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/irq.c - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/delay.h> - -#include <asm/irq_cpu.h> -#include <asm/mipsregs.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -static void emma2rh_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_IRQ_BASE; - u32 reg_value, reg_bitmask, reg_index; - - reg_index = EMMA2RH_BHIF_INT_EN_0 + - (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32); - reg_value = emma2rh_in32(reg_index); - reg_bitmask = 0x1 << (irq % 32); - emma2rh_out32(reg_index, reg_value | reg_bitmask); -} - -static void emma2rh_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_IRQ_BASE; - u32 reg_value, reg_bitmask, reg_index; - - reg_index = EMMA2RH_BHIF_INT_EN_0 + - (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32); - reg_value = emma2rh_in32(reg_index); - reg_bitmask = 0x1 << (irq % 32); - emma2rh_out32(reg_index, reg_value & ~reg_bitmask); -} - -struct irq_chip emma2rh_irq_controller = { - .name = "emma2rh_irq", - .irq_mask = emma2rh_irq_disable, - .irq_unmask = emma2rh_irq_enable, -}; - -void emma2rh_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ; i++) - irq_set_chip_and_handler_name(EMMA2RH_IRQ_BASE + i, - &emma2rh_irq_controller, - handle_level_irq, "level"); -} - -static void emma2rh_sw_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - reg |= 1 << irq; - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg); -} - -static void emma2rh_sw_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg); -} - -struct irq_chip emma2rh_sw_irq_controller = { - .name = "emma2rh_sw_irq", - .irq_mask = emma2rh_sw_irq_disable, - .irq_unmask = emma2rh_sw_irq_enable, -}; - -void emma2rh_sw_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ_SW; i++) - irq_set_chip_and_handler_name(EMMA2RH_SW_IRQ_BASE + i, - &emma2rh_sw_irq_controller, - handle_level_irq, "level"); -} - -static void emma2rh_gpio_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg |= 1 << irq; - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -static void emma2rh_gpio_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -static void emma2rh_gpio_irq_ack(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq)); -} - -static void emma2rh_gpio_irq_mask_ack(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq)); - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -struct irq_chip emma2rh_gpio_irq_controller = { - .name = "emma2rh_gpio_irq", - .irq_ack = emma2rh_gpio_irq_ack, - .irq_mask = emma2rh_gpio_irq_disable, - .irq_mask_ack = emma2rh_gpio_irq_mask_ack, - .irq_unmask = emma2rh_gpio_irq_enable, -}; - -void emma2rh_gpio_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ_GPIO; i++) - irq_set_chip_and_handler_name(EMMA2RH_GPIO_IRQ_BASE + i, - &emma2rh_gpio_irq_controller, - handle_edge_irq, "edge"); -} - -/* - * the first level int-handler will jump here if it is a emma2rh irq - */ -void emma2rh_irq_dispatch(void) -{ - u32 intStatus; - u32 bitmask; - u32 i; - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_0) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_0); - -#ifdef EMMA2RH_SW_CASCADE - if (intStatus & (1UL << EMMA2RH_SW_CASCADE)) { - u32 swIntStatus; - swIntStatus = emma2rh_in32(EMMA2RH_BHIF_SW_INT) - & emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (swIntStatus & bitmask) { - do_IRQ(EMMA2RH_SW_IRQ_BASE + i); - return; - } - } - } - /* Skip S/W interrupt */ - intStatus &= ~(1UL << EMMA2RH_SW_CASCADE); -#endif - - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_1) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_1); - -#ifdef EMMA2RH_GPIO_CASCADE - if (intStatus & (1UL << (EMMA2RH_GPIO_CASCADE % 32))) { - u32 gpioIntStatus; - gpioIntStatus = emma2rh_in32(EMMA2RH_GPIO_INT_ST) - & emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (gpioIntStatus & bitmask) { - do_IRQ(EMMA2RH_GPIO_IRQ_BASE + i); - return; - } - } - } - /* Skip GPIO interrupt */ - intStatus &= ~(1UL << (EMMA2RH_GPIO_CASCADE % 32)); -#endif - - for (i = 32, bitmask = 1; i < 64; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_2) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_2); - - for (i = 64, bitmask = 1; i < 96; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } -} - -void __init arch_init_irq(void) -{ - u32 reg; - int irq; - - /* by default, interrupts are disabled. */ - emma2rh_out32(EMMA2RH_BHIF_INT_EN_0, 0); - emma2rh_out32(EMMA2RH_BHIF_INT_EN_1, 0); - emma2rh_out32(EMMA2RH_BHIF_INT_EN_2, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_0, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_1, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_2, 0); - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, 0); - - clear_c0_status(0xff00); - set_c0_status(0x0400); - -#define GPIO_PCI (0xf<<15) - /* setup GPIO interrupt for PCI interface */ - /* direction input */ - reg = emma2rh_in32(EMMA2RH_GPIO_DIR); - emma2rh_out32(EMMA2RH_GPIO_DIR, reg & ~GPIO_PCI); - /* disable interrupt */ - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg & ~GPIO_PCI); - /* level triggerd */ - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MODE); - emma2rh_out32(EMMA2RH_GPIO_INT_MODE, reg | GPIO_PCI); - reg = emma2rh_in32(EMMA2RH_GPIO_INT_CND_A); - emma2rh_out32(EMMA2RH_GPIO_INT_CND_A, reg & (~GPIO_PCI)); - /* interrupt clear */ - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~GPIO_PCI); - - /* init all controllers */ - emma2rh_irq_init(); - emma2rh_sw_irq_init(); - emma2rh_gpio_irq_init(); - mips_cpu_irq_init(); - - /* setup cascade interrupts */ - irq = EMMA2RH_IRQ_BASE + EMMA2RH_SW_CASCADE; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); - irq = EMMA2RH_IRQ_BASE + EMMA2RH_GPIO_CASCADE; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); - irq = MIPS_CPU_IRQ_BASE + 2; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - - if (pending & STATUSF_IP7) - do_IRQ(MIPS_CPU_IRQ_BASE + 7); - else if (pending & STATUSF_IP2) - emma2rh_irq_dispatch(); - else if (pending & STATUSF_IP1) - do_IRQ(MIPS_CPU_IRQ_BASE + 1); - else if (pending & STATUSF_IP0) - do_IRQ(MIPS_CPU_IRQ_BASE + 0); - else - spurious_interrupt(); -} diff --git a/arch/mips/emma/markeins/led.c b/arch/mips/emma/markeins/led.c deleted file mode 100644 index d377542c0ec4..000000000000 --- a/arch/mips/emma/markeins/led.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/string.h> -#include <asm/emma/emma2rh.h> - -const unsigned long clear = 0x20202020; - -#define LED_BASE 0xb1400038 - -void markeins_led_clear(void) -{ - emma2rh_out32(LED_BASE, clear); - emma2rh_out32(LED_BASE + 4, clear); -} - -void markeins_led(const char *str) -{ - int i; - int len = strlen(str); - - markeins_led_clear(); - if (len > 8) - len = 8; - - if (emma2rh_in32(0xb0000800) & (0x1 << 18)) - for (i = 0; i < len; i++) - emma2rh_out8(LED_BASE + i, str[i]); - else - for (i = 0; i < len; i++) - emma2rh_out8(LED_BASE + (i & 4) + (3 - (i & 3)), - str[i]); -} - -void markeins_led_hex(u32 val) -{ - char str[10]; - - sprintf(str, "%08x", val); - markeins_led(str); -} diff --git a/arch/mips/emma/markeins/platform.c b/arch/mips/emma/markeins/platform.c deleted file mode 100644 index 97eeb9e8fb2b..000000000000 --- a/arch/mips/emma/markeins/platform.c +++ /dev/null @@ -1,199 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright(C) MontaVista Software Inc, 2006 - * - * Author: dmitry pervushin <dpervushin@ru.mvista.com> - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/ioport.h> -#include <linux/serial_8250.h> -#include <linux/mtd/physmap.h> - -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/addrspace.h> -#include <asm/time.h> -#include <asm/bcache.h> -#include <asm/irq.h> -#include <asm/reboot.h> -#include <asm/traps.h> - -#include <asm/emma/emma2rh.h> - - -#define I2C_EMMA2RH "emma2rh-iic" /* must be in sync with IIC driver */ - -static struct resource i2c_emma_resources_0[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC0, - .end = EMMA2RH_IRQ_PIIC0, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC0_BASE, - .end = EMMA2RH_PIIC0_BASE + 0x1000, - .flags = 0 - }, -}; - -struct resource i2c_emma_resources_1[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC1, - .end = EMMA2RH_IRQ_PIIC1, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC1_BASE, - .end = EMMA2RH_PIIC1_BASE + 0x1000, - .flags = 0 - }, -}; - -struct resource i2c_emma_resources_2[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC2, - .end = EMMA2RH_IRQ_PIIC2, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC2_BASE, - .end = EMMA2RH_PIIC2_BASE + 0x1000, - .flags = 0 - }, -}; - -struct platform_device i2c_emma_devices[] = { - [0] = { - .name = I2C_EMMA2RH, - .id = 0, - .resource = i2c_emma_resources_0, - .num_resources = ARRAY_SIZE(i2c_emma_resources_0), - }, - [1] = { - .name = I2C_EMMA2RH, - .id = 1, - .resource = i2c_emma_resources_1, - .num_resources = ARRAY_SIZE(i2c_emma_resources_1), - }, - [2] = { - .name = I2C_EMMA2RH, - .id = 2, - .resource = i2c_emma_resources_2, - .num_resources = ARRAY_SIZE(i2c_emma_resources_2), - }, -}; - -#define EMMA2RH_SERIAL_CLOCK 18544000 -#define EMMA2RH_SERIAL_FLAGS UPF_BOOT_AUTOCONF | UPF_SKIP_TEST - -static struct plat_serial8250_port platform_serial_ports[] = { - [0] = { - .membase= (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR0_BASE + 3), - .mapbase = EMMA2RH_PFUR0_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR0, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [1] = { - .membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR1_BASE + 3), - .mapbase = EMMA2RH_PFUR1_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR1, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [2] = { - .membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR2_BASE + 3), - .mapbase = EMMA2RH_PFUR2_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR2, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [3] = { - .flags = 0, - }, -}; - -static struct platform_device serial_emma = { - .name = "serial8250", - .dev = { - .platform_data = &platform_serial_ports, - }, -}; - -static struct mtd_partition markeins_parts[] = { - [0] = { - .name = "RootFS", - .offset = 0x00000000, - .size = 0x00c00000, - }, - [1] = { - .name = "boot code area", - .offset = MTDPART_OFS_APPEND, - .size = 0x00100000, - }, - [2] = { - .name = "kernel image", - .offset = MTDPART_OFS_APPEND, - .size = 0x00300000, - }, - [3] = { - .name = "RootFS2", - .offset = MTDPART_OFS_APPEND, - .size = 0x00c00000, - }, - [4] = { - .name = "boot code area2", - .offset = MTDPART_OFS_APPEND, - .size = 0x00100000, - }, - [5] = { - .name = "kernel image2", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; - -static struct physmap_flash_data markeins_flash_data = { - .width = 2, - .nr_parts = ARRAY_SIZE(markeins_parts), - .parts = markeins_parts -}; - -static struct resource markeins_flash_resource = { - .start = 0x1e000000, - .end = 0x02000000, - .flags = IORESOURCE_MEM -}; - -static struct platform_device markeins_flash_device = { - .name = "physmap-flash", - .id = 0, - .dev = { - .platform_data = &markeins_flash_data, - }, - .num_resources = 1, - .resource = &markeins_flash_resource, -}; - -static struct platform_device *devices[] = { - i2c_emma_devices, - i2c_emma_devices + 1, - i2c_emma_devices + 2, - &serial_emma, - &markeins_flash_device, -}; - -static int __init platform_devices_setup(void) -{ - return platform_add_devices(devices, ARRAY_SIZE(devices)); -} - -arch_initcall(platform_devices_setup); diff --git a/arch/mips/emma/markeins/setup.c b/arch/mips/emma/markeins/setup.c deleted file mode 100644 index c8a91c2a63bc..000000000000 --- a/arch/mips/emma/markeins/setup.c +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/setup.c. - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/types.h> - -#include <asm/time.h> -#include <asm/reboot.h> - -#include <asm/emma/emma2rh.h> - -#define USE_CPU_COUNTER_TIMER /* whether we use cpu counter */ - -extern void markeins_led(const char *); - -static int bus_frequency; - -static void markeins_machine_restart(char *command) -{ - static void (*back_to_prom) (void) = (void (*)(void))0xbfc00000; - - printk("cannot EMMA2RH Mark-eins restart.\n"); - markeins_led("restart."); - back_to_prom(); -} - -static void markeins_machine_halt(void) -{ - printk("EMMA2RH Mark-eins halted.\n"); - markeins_led("halted."); - while (1) ; -} - -static void markeins_machine_power_off(void) -{ - markeins_led("poweroff."); - while (1) ; -} - -static unsigned long __initdata emma2rh_clock[4] = { - 166500000, 187312500, 199800000, 210600000 -}; - -static unsigned int __init detect_bus_frequency(unsigned long rtc_base) -{ - u32 reg; - - /* detect from boot strap */ - reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0); - reg = (reg >> 4) & 0x3; - - return emma2rh_clock[reg]; -} - -void __init plat_time_init(void) -{ - u32 reg; - if (bus_frequency == 0) - bus_frequency = detect_bus_frequency(0); - - reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0); - if ((reg & 0x3) == 0) - reg = (reg >> 6) & 0x3; - else { - reg = emma2rh_in32(EMMA2RH_BHIF_MAIN_CTRL); - reg = (reg >> 4) & 0x3; - } - mips_hpt_frequency = (bus_frequency * (4 + reg)) / 4 / 2; -} - -static void markeins_board_init(void); -extern void markeins_irq_setup(void); - -static inline void __init markeins_sio_setup(void) -{ -} - -void __init plat_mem_setup(void) -{ - /* initialize board - we don't trust the loader */ - markeins_board_init(); - - set_io_port_base(KSEG1ADDR(EMMA2RH_PCI_IO_BASE)); - - _machine_restart = markeins_machine_restart; - _machine_halt = markeins_machine_halt; - pm_power_off = markeins_machine_power_off; - - /* setup resource limits */ - ioport_resource.start = EMMA2RH_PCI_IO_BASE; - ioport_resource.end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1; - iomem_resource.start = EMMA2RH_IO_BASE; - iomem_resource.end = EMMA2RH_ROM_BASE - 1; - - markeins_sio_setup(); -} - -static void __init markeins_board_init(void) -{ - u32 val; - - val = emma2rh_in32(EMMA2RH_PBRD_INT_EN); /* open serial interrupts. */ - emma2rh_out32(EMMA2RH_PBRD_INT_EN, val | 0xaa); - val = emma2rh_in32(EMMA2RH_PBRD_CLKSEL); /* set serial clocks. */ - emma2rh_out32(EMMA2RH_PBRD_CLKSEL, val | 0x5); /* 18MHz */ - emma2rh_out32(EMMA2RH_PCI_CONTROL, 0); - - markeins_led("MVL E2RH"); -} diff --git a/arch/mips/fw/cfe/cfe_api.c b/arch/mips/fw/cfe/cfe_api.c index c020b29f561c..0c9c97ab291e 100644 --- a/arch/mips/fw/cfe/cfe_api.c +++ b/arch/mips/fw/cfe/cfe_api.c @@ -243,11 +243,6 @@ int cfe_getfwinfo(cfe_fwinfo_t * info) info->fwi_bootarea_pa = xiocb.plist.xiocb_fwinfo.fwi_bootarea_pa; info->fwi_bootarea_size = xiocb.plist.xiocb_fwinfo.fwi_bootarea_size; -#if 0 - info->fwi_reserved1 = xiocb.plist.xiocb_fwinfo.fwi_reserved1; - info->fwi_reserved2 = xiocb.plist.xiocb_fwinfo.fwi_reserved2; - info->fwi_reserved3 = xiocb.plist.xiocb_fwinfo.fwi_reserved3; -#endif return 0; } diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index eaa19d189324..53c33cb72974 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -8,7 +8,6 @@ # option) any later version. # -platform-$(CONFIG_MIPS_GENERIC) += generic/ cflags-$(CONFIG_MIPS_GENERIC) += -I$(srctree)/arch/mips/include/asm/mach-generic load-$(CONFIG_MIPS_GENERIC) += 0xffffffff80100000 all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h index d80be38c4144..e327ebc76753 100644 --- a/arch/mips/include/asm/asm-eva.h +++ b/arch/mips/include/asm/asm-eva.h @@ -180,7 +180,7 @@ #define user_ld(reg, addr) kernel_lw(reg, addr) #else #define user_sd(reg, addr) kernel_sd(reg, addr) -#define user_ld(reg, addr) kernel_sd(reg, addr) +#define user_ld(reg, addr) kernel_ld(reg, addr) #endif /* CONFIG_32BIT */ #endif /* CONFIG_EVA */ diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index c23527ba65d0..3682d1a0bb80 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -74,10 +74,15 @@ symbol: .insn .globl symbol; \ symbol = value -#define PANIC(msg) \ +#define TEXT(msg) \ + .pushsection .data; \ +8: .asciiz msg; \ + .popsection; + +#define ASM_PANIC(msg) \ .set push; \ .set reorder; \ - PTR_LA a0, 8f; \ + PTR_LA a0, 8f; \ jal panic; \ 9: b 9b; \ .set pop; \ @@ -87,22 +92,17 @@ symbol = value * Print formatted string */ #ifdef CONFIG_PRINTK -#define PRINT(string) \ +#define ASM_PRINT(string) \ .set push; \ .set reorder; \ - PTR_LA a0, 8f; \ + PTR_LA a0, 8f; \ jal printk; \ .set pop; \ TEXT(string) #else -#define PRINT(string) +#define ASM_PRINT(string) #endif -#define TEXT(msg) \ - .pushsection .data; \ -8: .asciiz msg; \ - .popsection; - /* * Stack alignment */ @@ -202,7 +202,9 @@ symbol = value #define LONG_SRA sra #define LONG_SRAV srav +#ifdef __ASSEMBLY__ #define LONG .word +#endif #define LONGSIZE 4 #define LONGMASK 3 #define LONGLOG 2 @@ -225,7 +227,9 @@ symbol = value #define LONG_SRA dsra #define LONG_SRAV dsrav +#ifdef __ASSEMBLY__ #define LONG .dword +#endif #define LONGSIZE 8 #define LONGMASK 7 #define LONGLOG 3 diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 655f40ddb6d1..86f2323ebe6b 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -44,7 +44,8 @@ .endm #endif -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) .macro local_irq_enable reg=t0 ei irq_enable_hazard @@ -54,7 +55,7 @@ di irq_disable_hazard .endm -#else +#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro local_irq_enable reg=t0 mfc0 \reg, CP0_STATUS ori \reg, \reg, 1 @@ -79,7 +80,7 @@ sw \reg, TI_PRE_COUNT($28) #endif .endm -#endif /* CONFIG_CPU_MIPSR2 */ +#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro fpu_save_16even thread tmp=t0 .set push @@ -131,7 +132,7 @@ .macro fpu_save_double thread status tmp #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) sll \tmp, \status, 5 bgez \tmp, 10f fpu_save_16odd \thread @@ -190,7 +191,7 @@ .macro fpu_restore_double thread status tmp #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) sll \tmp, \status, 5 bgez \tmp, 10f # 16 register mode? @@ -200,16 +201,17 @@ fpu_restore_16even \thread \tmp .endm -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) .macro _EXT rd, rs, p, s ext \rd, \rs, \p, \s .endm -#else /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */ +#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro _EXT rd, rs, p, s srl \rd, \rs, \p andi \rd, \rd, (1 << \s) - 1 .endm -#endif /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ /* * Temporary until all gas have MT ASE support diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 61727785a247..c3bd9b2d66e4 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -42,17 +42,6 @@ #define MACH_DS5900 10 /* DECsystem 5900 */ /* - * Valid machtype for group PMC-MSP - */ -#define MACH_MSP4200_EVAL 0 /* PMC-Sierra MSP4200 Evaluation */ -#define MACH_MSP4200_GW 1 /* PMC-Sierra MSP4200 Gateway demo */ -#define MACH_MSP4200_FPGA 2 /* PMC-Sierra MSP4200 Emulation */ -#define MACH_MSP7120_EVAL 3 /* PMC-Sierra MSP7120 Evaluation */ -#define MACH_MSP7120_GW 4 /* PMC-Sierra MSP7120 Residential GW */ -#define MACH_MSP7120_FPGA 5 /* PMC-Sierra MSP7120 Emulation */ -#define MACH_MSP_OTHER 255 /* PMC-Sierra unknown board type */ - -/* * Valid machtype for group Mikrotik */ #define MACH_MIKROTIK_RB532 0 /* Mikrotik RouterBoard 532 */ @@ -121,7 +110,7 @@ extern unsigned long fw_passed_dtb; #endif /* - * Platform memory detection hook called by setup_arch + * Platform memory detection hook called by arch_mem_init() */ extern void plat_mem_setup(void); diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index da80878f2c0d..fa3dcbf56fa9 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -27,6 +27,9 @@ extern int __MIPS16e_compute_return_epc(struct pt_regs *regs); #define MM_POOL32A_MINOR_SHIFT 0x6 #define MM_MIPS32_COND_FC 0x30 +int isBranchInstr(struct pt_regs *regs, + struct mm_decoded_insn dec_insn, unsigned long *contpc); + extern int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, unsigned long *contpc); diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h index 8031fbc6b69a..50253efecb56 100644 --- a/arch/mips/include/asm/cacheops.h +++ b/arch/mips/include/asm/cacheops.h @@ -48,7 +48,7 @@ * R4000-specific cacheops */ #define Create_Dirty_Excl_D (Cache_D | 0x0c) -#define Fill (Cache_I | 0x14) +#define Fill_I (Cache_I | 0x14) #define Hit_Writeback_I (Cache_I | Hit_Writeback) #define Hit_Writeback_D (Cache_D | Hit_Writeback) diff --git a/arch/mips/include/asm/clock.h b/arch/mips/include/asm/clock.h deleted file mode 100644 index 5a8f96ebe5fa..000000000000 --- a/arch/mips/include/asm/clock.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MIPS_CLOCK_H -#define __ASM_MIPS_CLOCK_H - -#include <linux/kref.h> -#include <linux/list.h> -#include <linux/seq_file.h> -#include <linux/clk.h> - -struct clk; - -struct clk_ops { - void (*init) (struct clk *clk); - void (*enable) (struct clk *clk); - void (*disable) (struct clk *clk); - void (*recalc) (struct clk *clk); - int (*set_rate) (struct clk *clk, unsigned long rate, int algo_id); - long (*round_rate) (struct clk *clk, unsigned long rate); -}; - -struct clk { - struct list_head node; - const char *name; - int id; - struct module *owner; - - struct clk *parent; - struct clk_ops *ops; - - struct kref kref; - - unsigned long rate; - unsigned long flags; -}; - -#define CLK_ALWAYS_ENABLED (1 << 0) -#define CLK_RATE_PROPAGATES (1 << 1) - -int clk_init(void); - -int __clk_enable(struct clk *); -void __clk_disable(struct clk *); - -void clk_recalc_rate(struct clk *); - -int clk_register(struct clk *); -void clk_unregister(struct clk *); - -#endif /* __ASM_MIPS_CLOCK_H */ diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index f77e99f1722e..a2cb2d2b1c07 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -57,6 +57,11 @@ #define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL #define MIPS_ISA_LEVEL_RAW mips64r6 #define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW +#elif defined(CONFIG_CPU_MIPSR5) +#define MIPS_ISA_LEVEL "mips64r5" +#define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL +#define MIPS_ISA_LEVEL_RAW mips64r5 +#define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW #else /* MIPS64 is a superset of MIPS32 */ #define MIPS_ISA_LEVEL "mips64r2" diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index de44c92b1c1f..caecbae4b599 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -284,14 +284,23 @@ #ifndef cpu_has_mips32r2 # define cpu_has_mips32r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M32R2) #endif +#ifndef cpu_has_mips32r5 +# define cpu_has_mips32r5 __isa_range_or_flag(5, 6, MIPS_CPU_ISA_M32R5) +#endif #ifndef cpu_has_mips32r6 # define cpu_has_mips32r6 __isa_ge_or_flag(6, MIPS_CPU_ISA_M32R6) #endif #ifndef cpu_has_mips64r1 -# define cpu_has_mips64r1 __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1) +# define cpu_has_mips64r1 (cpu_has_64bits && \ + __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1)) #endif #ifndef cpu_has_mips64r2 -# define cpu_has_mips64r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2) +# define cpu_has_mips64r2 (cpu_has_64bits && \ + __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2)) +#endif +#ifndef cpu_has_mips64r5 +# define cpu_has_mips64r5 (cpu_has_64bits && \ + __isa_range_or_flag(5, 6, MIPS_CPU_ISA_M64R5)) #endif #ifndef cpu_has_mips64r6 # define cpu_has_mips64r6 __isa_ge_and_flag(6, MIPS_CPU_ISA_M64R6) @@ -313,19 +322,25 @@ (cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6) #define cpu_has_mips_4_5_64_r2_r6 \ (cpu_has_mips_4_5 | cpu_has_mips64r1 | \ - cpu_has_mips_r2 | cpu_has_mips_r6) + cpu_has_mips_r2 | cpu_has_mips_r5 | \ + cpu_has_mips_r6) -#define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6) -#define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6) +#define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | \ + cpu_has_mips32r5 | cpu_has_mips32r6) +#define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | \ + cpu_has_mips64r5 | cpu_has_mips64r6) #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1) #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2) +#define cpu_has_mips_r5 (cpu_has_mips32r5 | cpu_has_mips64r5) #define cpu_has_mips_r6 (cpu_has_mips32r6 | cpu_has_mips64r6) #define cpu_has_mips_r (cpu_has_mips32r1 | cpu_has_mips32r2 | \ - cpu_has_mips32r6 | cpu_has_mips64r1 | \ - cpu_has_mips64r2 | cpu_has_mips64r6) + cpu_has_mips32r5 | cpu_has_mips32r6 | \ + cpu_has_mips64r1 | cpu_has_mips64r2 | \ + cpu_has_mips64r5 | cpu_has_mips64r6) -/* MIPSR2 and MIPSR6 have a lot of similarities */ -#define cpu_has_mips_r2_r6 (cpu_has_mips_r2 | cpu_has_mips_r6) +/* MIPSR2 - MIPSR6 have a lot of similarities */ +#define cpu_has_mips_r2_r6 (cpu_has_mips_r2 | cpu_has_mips_r5 | \ + cpu_has_mips_r6) /* * cpu_has_mips_r2_exec_hazard - return if IHB is required on current processor @@ -435,9 +450,6 @@ # ifndef cpu_has_64bit_gp_regs # define cpu_has_64bit_gp_regs 0 # endif -# ifndef cpu_has_64bit_addresses -# define cpu_has_64bit_addresses 0 -# endif # ifndef cpu_vmbits # define cpu_vmbits 31 # endif @@ -456,9 +468,6 @@ # ifndef cpu_has_64bit_gp_regs # define cpu_has_64bit_gp_regs 1 # endif -# ifndef cpu_has_64bit_addresses -# define cpu_has_64bit_addresses 1 -# endif # ifndef cpu_vmbits # define cpu_vmbits cpu_data[0].vmbits # define __NEED_VMBITS_PROBE @@ -620,6 +629,14 @@ # endif #endif +#ifndef cpu_has_mm_sysad +# define cpu_has_mm_sysad __opt(MIPS_CPU_MM_SYSAD) +#endif + +#ifndef cpu_has_mm_full +# define cpu_has_mm_full __opt(MIPS_CPU_MM_FULL) +#endif + /* * Guest capabilities */ diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index ed7ffe4e63a3..a600670d00e9 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -105,6 +105,15 @@ struct cpuinfo_mips { unsigned int gtoffset_mask; unsigned int guestid_mask; unsigned int guestid_cache; + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + /* CPUCFG data for this CPU, synthesized at probe time. + * + * CPUCFG select 0 is PRId, 4 and above are unimplemented for now. + * So the only stored values are for CPUCFG selects 1-3 inclusive. + */ + u32 loongson3_cpucfg_data[3]; +#endif } __attribute__((aligned(SMP_CACHE_BYTES))); extern struct cpuinfo_mips cpu_data[]; @@ -142,7 +151,7 @@ struct proc_cpuinfo_notifier_args { static inline unsigned int cpu_cluster(struct cpuinfo_mips *cpuinfo) { /* Optimisation for systems where multiple clusters aren't used */ - if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) + if (!IS_ENABLED(CONFIG_CPU_MIPSR5) && !IS_ENABLED(CONFIG_CPU_MIPSR6)) return 0; return (cpuinfo->globalnumber & MIPS_GLOBALNUMBER_CLUSTER) >> diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 49f0061a6051..75a7a382da09 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -51,13 +51,18 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_M14KEC: case CPU_INTERAPTIV: case CPU_PROAPTIV: - case CPU_P5600: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R5 case CPU_M5150: + case CPU_P5600: #endif #if defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R5) || \ defined(CONFIG_SYS_HAS_CPU_MIPS32_R6) || \ defined(CONFIG_SYS_HAS_CPU_MIPS64_R2) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS64_R5) || \ defined(CONFIG_SYS_HAS_CPU_MIPS64_R6) case CPU_QEMU_GENERIC: #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 216a22916740..104a509312b3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -250,6 +250,10 @@ #define PRID_REV_LOONGSON1C 0x0020 /* Same as Loongson-1B */ #define PRID_REV_LOONGSON2E 0x0002 #define PRID_REV_LOONGSON2F 0x0003 +#define PRID_REV_LOONGSON2K_R1_0 0x0000 +#define PRID_REV_LOONGSON2K_R1_1 0x0001 +#define PRID_REV_LOONGSON2K_R1_2 0x0002 +#define PRID_REV_LOONGSON2K_R1_3 0x0003 #define PRID_REV_LOONGSON3A_R1 0x0005 #define PRID_REV_LOONGSON3B_R1 0x0006 #define PRID_REV_LOONGSON3B_R2 0x0007 @@ -343,14 +347,16 @@ enum cpu_type_enum { #define MIPS_CPU_ISA_M32R2 0x00000020 #define MIPS_CPU_ISA_M64R1 0x00000040 #define MIPS_CPU_ISA_M64R2 0x00000080 -#define MIPS_CPU_ISA_M32R6 0x00000100 -#define MIPS_CPU_ISA_M64R6 0x00000200 +#define MIPS_CPU_ISA_M32R5 0x00000100 +#define MIPS_CPU_ISA_M64R5 0x00000200 +#define MIPS_CPU_ISA_M32R6 0x00000400 +#define MIPS_CPU_ISA_M64R6 0x00000800 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_II | MIPS_CPU_ISA_M32R1 | \ - MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R6) + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M32R6) #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \ MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2 | \ - MIPS_CPU_ISA_M64R6) + MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M64R6) /* * CPU Option encodings @@ -416,7 +422,9 @@ enum cpu_type_enum { #define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \ BIT_ULL(56) /* CPU has perf counters implemented per TC (MIPSMT ASE) */ #define MIPS_CPU_MMID BIT_ULL(57) /* CPU supports MemoryMapIDs */ -#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(58) /* CPU Only support MAC2008 Fused multiply-add instruction */ +#define MIPS_CPU_MM_SYSAD BIT_ULL(58) /* CPU supports write-through SysAD Valid merge */ +#define MIPS_CPU_MM_FULL BIT_ULL(59) /* CPU supports write-through full merge */ +#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(60) /* CPU Only support MAC2008 Fused multiply-add instruction */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/emma/emma2rh.h b/arch/mips/include/asm/emma/emma2rh.h deleted file mode 100644 index a25cdb378fe8..000000000000 --- a/arch/mips/include/asm/emma/emma2rh.h +++ /dev/null @@ -1,248 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) NEC Electronics Corporation 2005-2006 - * - * This file based on include/asm-mips/ddb5xxx/ddb5xxx.h - * Copyright 2001 MontaVista Software Inc. - */ -#ifndef __ASM_EMMA_EMMA2RH_H -#define __ASM_EMMA_EMMA2RH_H - -#include <irq.h> - -/* - * EMMA2RH registers - */ -#define REGBASE 0x10000000 - -#define EMMA2RH_BHIF_STRAP_0 (0x000010+REGBASE) -#define EMMA2RH_BHIF_INT_ST_0 (0x000030+REGBASE) -#define EMMA2RH_BHIF_INT_ST_1 (0x000034+REGBASE) -#define EMMA2RH_BHIF_INT_ST_2 (0x000038+REGBASE) -#define EMMA2RH_BHIF_INT_EN_0 (0x000040+REGBASE) -#define EMMA2RH_BHIF_INT_EN_1 (0x000044+REGBASE) -#define EMMA2RH_BHIF_INT_EN_2 (0x000048+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_0 (0x000050+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_1 (0x000054+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_2 (0x000058+REGBASE) -#define EMMA2RH_BHIF_SW_INT (0x000070+REGBASE) -#define EMMA2RH_BHIF_SW_INT_EN (0x000080+REGBASE) -#define EMMA2RH_BHIF_SW_INT_CLR (0x000090+REGBASE) -#define EMMA2RH_BHIF_MAIN_CTRL (0x0000b4+REGBASE) -#define EMMA2RH_BHIF_EXCEPT_VECT_BASE_ADDRESS (0x0000c0+REGBASE) -#define EMMA2RH_GPIO_DIR (0x110d20+REGBASE) -#define EMMA2RH_GPIO_INT_ST (0x110d30+REGBASE) -#define EMMA2RH_GPIO_INT_MASK (0x110d3c+REGBASE) -#define EMMA2RH_GPIO_INT_MODE (0x110d48+REGBASE) -#define EMMA2RH_GPIO_INT_CND_A (0x110d54+REGBASE) -#define EMMA2RH_GPIO_INT_CND_B (0x110d60+REGBASE) -#define EMMA2RH_PBRD_INT_EN (0x100010+REGBASE) -#define EMMA2RH_PBRD_CLKSEL (0x100028+REGBASE) -#define EMMA2RH_PFUR0_BASE (0x101000+REGBASE) -#define EMMA2RH_PFUR1_BASE (0x102000+REGBASE) -#define EMMA2RH_PFUR2_BASE (0x103000+REGBASE) -#define EMMA2RH_PIIC0_BASE (0x107000+REGBASE) -#define EMMA2RH_PIIC1_BASE (0x108000+REGBASE) -#define EMMA2RH_PIIC2_BASE (0x109000+REGBASE) -#define EMMA2RH_PCI_CONTROL (0x200000+REGBASE) -#define EMMA2RH_PCI_ARBIT_CTR (0x200004+REGBASE) -#define EMMA2RH_PCI_IWIN0_CTR (0x200010+REGBASE) -#define EMMA2RH_PCI_IWIN1_CTR (0x200014+REGBASE) -#define EMMA2RH_PCI_INIT_ESWP (0x200018+REGBASE) -#define EMMA2RH_PCI_INT (0x200020+REGBASE) -#define EMMA2RH_PCI_INT_EN (0x200024+REGBASE) -#define EMMA2RH_PCI_TWIN_CTR (0x200030+REGBASE) -#define EMMA2RH_PCI_TWIN_BADR (0x200034+REGBASE) -#define EMMA2RH_PCI_TWIN0_DADR (0x200038+REGBASE) -#define EMMA2RH_PCI_TWIN1_DADR (0x20003c+REGBASE) - -/* - * Memory map (physical address) - * - * Note most of the following address must be properly aligned by the - * corresponding size. For example, if PCI_IO_SIZE is 16MB, then - * PCI_IO_BASE must be aligned along 16MB boundary. - */ - -/* the actual ram size is detected at run-time */ -#define EMMA2RH_RAM_BASE 0x00000000 -#define EMMA2RH_RAM_SIZE 0x10000000 /* less than 256MB */ - -#define EMMA2RH_IO_BASE 0x10000000 -#define EMMA2RH_IO_SIZE 0x01000000 /* 16 MB */ - -#define EMMA2RH_GENERALIO_BASE 0x11000000 -#define EMMA2RH_GENERALIO_SIZE 0x01000000 /* 16 MB */ - -#define EMMA2RH_PCI_IO_BASE 0x12000000 -#define EMMA2RH_PCI_IO_SIZE 0x02000000 /* 32 MB */ - -#define EMMA2RH_PCI_MEM_BASE 0x14000000 -#define EMMA2RH_PCI_MEM_SIZE 0x08000000 /* 128 MB */ - -#define EMMA2RH_ROM_BASE 0x1c000000 -#define EMMA2RH_ROM_SIZE 0x04000000 /* 64 MB */ - -#define EMMA2RH_PCI_CONFIG_BASE EMMA2RH_PCI_IO_BASE -#define EMMA2RH_PCI_CONFIG_SIZE EMMA2RH_PCI_IO_SIZE - -#define NUM_EMMA2RH_IRQ 96 - -#define EMMA2RH_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) - -/* - * emma2rh irq defs - */ - -#define EMMA2RH_IRQ_INT(n) (EMMA2RH_IRQ_BASE + (n)) - -#define EMMA2RH_IRQ_PFUR0 EMMA2RH_IRQ_INT(49) -#define EMMA2RH_IRQ_PFUR1 EMMA2RH_IRQ_INT(50) -#define EMMA2RH_IRQ_PFUR2 EMMA2RH_IRQ_INT(51) -#define EMMA2RH_IRQ_PIIC0 EMMA2RH_IRQ_INT(56) -#define EMMA2RH_IRQ_PIIC1 EMMA2RH_IRQ_INT(57) -#define EMMA2RH_IRQ_PIIC2 EMMA2RH_IRQ_INT(58) - -/* - * EMMA2RH Register Access - */ - -#define EMMA2RH_BASE (0xa0000000) - -static inline void emma2rh_sync(void) -{ - volatile u32 *p = (volatile u32 *)0xbfc00000; - (void)(*p); -} - -static inline void emma2rh_out32(u32 offset, u32 val) -{ - *(volatile u32 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u32 emma2rh_in32(u32 offset) -{ - u32 val = *(volatile u32 *)(EMMA2RH_BASE | offset); - return val; -} - -static inline void emma2rh_out16(u32 offset, u16 val) -{ - *(volatile u16 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u16 emma2rh_in16(u32 offset) -{ - u16 val = *(volatile u16 *)(EMMA2RH_BASE | offset); - return val; -} - -static inline void emma2rh_out8(u32 offset, u8 val) -{ - *(volatile u8 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u8 emma2rh_in8(u32 offset) -{ - u8 val = *(volatile u8 *)(EMMA2RH_BASE | offset); - return val; -} - -/** - * IIC registers map - **/ - -/*---------------------------------------------------------------------------*/ -/* CNT - Control register (00H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SPT 0x00000001 -#define STT 0x00000002 -#define ACKE 0x00000004 -#define WTIM 0x00000008 -#define SPIE 0x00000010 -#define WREL 0x00000020 -#define LREL 0x00000040 -#define IICE 0x00000080 -#define CNT_RESERVED 0x000000ff /* reserved bit 0 */ - -#define I2C_EMMA_START (IICE | STT) -#define I2C_EMMA_STOP (IICE | SPT) -#define I2C_EMMA_REPSTART I2C_EMMA_START - -/*---------------------------------------------------------------------------*/ -/* STA - Status register (10H Read) */ -/*---------------------------------------------------------------------------*/ -#define MSTS 0x00000080 -#define ALD 0x00000040 -#define EXC 0x00000020 -#define COI 0x00000010 -#define TRC 0x00000008 -#define ACKD 0x00000004 -#define STD 0x00000002 -#define SPD 0x00000001 - -/*---------------------------------------------------------------------------*/ -/* CSEL - Clock select register (20H R/W) */ -/*---------------------------------------------------------------------------*/ -#define FCL 0x00000080 -#define ND50 0x00000040 -#define CLD 0x00000020 -#define DAD 0x00000010 -#define SMC 0x00000008 -#define DFC 0x00000004 -#define CL 0x00000003 -#define CSEL_RESERVED 0x000000ff /* reserved bit 0 */ - -#define FAST397 0x0000008b -#define FAST297 0x0000008a -#define FAST347 0x0000000b -#define FAST260 0x0000000a -#define FAST130 0x00000008 -#define STANDARD108 0x00000083 -#define STANDARD83 0x00000082 -#define STANDARD95 0x00000003 -#define STANDARD73 0x00000002 -#define STANDARD36 0x00000001 -#define STANDARD71 0x00000000 - -/*---------------------------------------------------------------------------*/ -/* SVA - Slave address register (30H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SVA 0x000000fe - -/*---------------------------------------------------------------------------*/ -/* SHR - Shift register (40H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SR 0x000000ff - -/*---------------------------------------------------------------------------*/ -/* INT - Interrupt register (50H R/W) */ -/* INTM - Interrupt mask register (60H R/W) */ -/*---------------------------------------------------------------------------*/ -#define INTE0 0x00000001 - -/*********************************************************************** - * I2C registers - *********************************************************************** - */ -#define I2C_EMMA_CNT 0x00 -#define I2C_EMMA_STA 0x10 -#define I2C_EMMA_CSEL 0x20 -#define I2C_EMMA_SVA 0x30 -#define I2C_EMMA_SHR 0x40 -#define I2C_EMMA_INT 0x50 -#define I2C_EMMA_INTM 0x60 - -/* - * include the board dependent part - */ -#ifdef CONFIG_NEC_MARKEINS -#include <asm/emma/markeins.h> -#else -#error "Unknown EMMA2RH board!" -#endif - -#endif /* __ASM_EMMA_EMMA2RH_H */ diff --git a/arch/mips/include/asm/emma/markeins.h b/arch/mips/include/asm/emma/markeins.h deleted file mode 100644 index 2d7e1339d36f..000000000000 --- a/arch/mips/include/asm/emma/markeins.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) NEC Electronics Corporation 2005-2006 - * - * This file based on include/asm-mips/ddb5xxx/ddb5xxx.h - * Copyright 2001 MontaVista Software Inc. - */ - -#ifndef MARKEINS_H -#define MARKEINS_H - -#define NUM_EMMA2RH_IRQ_SW 32 -#define NUM_EMMA2RH_IRQ_GPIO 32 - -#define EMMA2RH_SW_CASCADE (EMMA2RH_IRQ_INT(7) - EMMA2RH_IRQ_INT(0)) -#define EMMA2RH_GPIO_CASCADE (EMMA2RH_IRQ_INT(46) - EMMA2RH_IRQ_INT(0)) - -#define EMMA2RH_SW_IRQ_BASE (EMMA2RH_IRQ_BASE + NUM_EMMA2RH_IRQ) -#define EMMA2RH_GPIO_IRQ_BASE (EMMA2RH_SW_IRQ_BASE + NUM_EMMA2RH_IRQ_SW) - -#define EMMA2RH_SW_IRQ_INT(n) (EMMA2RH_SW_IRQ_BASE + (n)) - -#define MARKEINS_PCI_IRQ_INTA EMMA2RH_GPIO_IRQ_BASE+15 -#define MARKEINS_PCI_IRQ_INTB EMMA2RH_GPIO_IRQ_BASE+16 -#define MARKEINS_PCI_IRQ_INTC EMMA2RH_GPIO_IRQ_BASE+17 -#define MARKEINS_PCI_IRQ_INTD EMMA2RH_GPIO_IRQ_BASE+18 - -#endif /* CONFIG_MARKEINS */ diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 9476e0498d59..08f9dd6903b7 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -71,12 +71,12 @@ static inline int __enable_fpu(enum fpu_mode mode) goto fr_common; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \ - || defined(CONFIG_64BIT)) +#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; #endif - /* fall through */ + fallthrough; case FPU_32BIT: if (cpu_has_fre) { /* clear FRE */ diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index bb7c71ffe5b7..f67759e81210 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -172,10 +172,6 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); -int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc); -int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc); /* * Mask the FCSR Cause bits according to the Enable bits, observing diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index a0b92205f933..f855478d12fa 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -22,8 +22,9 @@ /* * TLB hazards */ -#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \ - !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) +#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6)) && \ + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) /* * MIPSR2 defines ehb for hazard avoidance @@ -278,7 +279,8 @@ do { \ #define __disable_fpu_hazard -#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) #define __enable_fpu_hazard \ ___ehb diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index cf1f2a4a2418..346fffd9e972 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -30,8 +30,6 @@ #include <asm/pgtable-bits.h> #include <asm/processor.h> #include <asm/string.h> - -#include <ioremap.h> #include <mangle-port.h> /* @@ -153,66 +151,9 @@ static inline void *isa_bus_to_virt(unsigned long address) */ #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags); -extern void __iounmap(const volatile void __iomem *addr); - -static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ - void __iomem *addr = plat_ioremap(offset, size, flags); - - if (addr) - return addr; - -#define __IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) - - if (cpu_has_64bit_addresses) { - u64 base = UNCAC_BASE; - - /* - * R10000 supports a 2 bit uncached attribute therefore - * UNCAC_BASE may not equal IO_BASE. - */ - if (flags == _CACHE_UNCACHED) - base = (u64) IO_BASE; - return (void __iomem *) (unsigned long) (base + offset); - } else if (__builtin_constant_p(offset) && - __builtin_constant_p(size) && __builtin_constant_p(flags)) { - phys_addr_t phys_addr, last_addr; - - phys_addr = fixup_bigphys_addr(offset, size); - - /* Don't allow wraparound or zero size. */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* - * Map uncached objects in the low 512MB of address - * space using KSEG1. - */ - if (__IS_LOW512(phys_addr) && __IS_LOW512(last_addr) && - flags == _CACHE_UNCACHED) - return (void __iomem *) - (unsigned long)CKSEG1ADDR(phys_addr); - } - - return __ioremap(offset, size, flags); - -#undef __IS_LOW512 -} - -/* - * ioremap_prot - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - - * ioremap_prot gives the caller control over cache coherency attributes (CCA) - */ -static inline void __iomem *ioremap_prot(phys_addr_t offset, - unsigned long size, unsigned long prot_val) { - return __ioremap_mode(offset, size, prot_val & _CACHE_MASK); -} +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val); +void iounmap(const volatile void __iomem *addr); /* * ioremap - map bus memory into CPU space @@ -226,7 +167,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * address. */ #define ioremap(offset, size) \ - __ioremap_mode((offset), (size), _CACHE_UNCACHED) + ioremap_prot((offset), (size), _CACHE_UNCACHED) #define ioremap_uc ioremap /* @@ -245,7 +186,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * memory-like regions on I/O busses. */ #define ioremap_cache(offset, size) \ - __ioremap_mode((offset), (size), _page_cachable_default) + ioremap_prot((offset), (size), _page_cachable_default) /* * ioremap_wc - map bus memory into CPU space @@ -266,23 +207,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * _CACHE_UNCACHED option (see cpu_probe() method). */ #define ioremap_wc(offset, size) \ - __ioremap_mode((offset), (size), boot_cpu_data.writecombine) - -static inline void iounmap(const volatile void __iomem *addr) -{ - if (plat_iounmap(addr)) - return; - -#define __IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) - - if (cpu_has_64bit_addresses || - (__builtin_constant_p(addr) && __IS_KSEG1(addr))) - return; - - __iounmap(addr); - -#undef __IS_KSEG1 -} + ioremap_prot((offset), (size), boot_cpu_data.writecombine) #if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64) #define war_io_reorder_wmb() wmb() diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2c343c346b79..e28b5a946e26 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -174,6 +174,8 @@ struct kvm_vcpu_stat { #endif u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 halt_poll_invalid; u64 halt_wakeup; }; diff --git a/arch/mips/include/asm/lasat/ds1603.h b/arch/mips/include/asm/lasat/ds1603.h deleted file mode 100644 index ab833be9637d..000000000000 --- a/arch/mips/include/asm/lasat/ds1603.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/addrspace.h> - -/* Lasat 100 */ -#define DS1603_REG_100 (KSEG1ADDR(0x1c810000)) -#define DS1603_RST_100 (1 << 2) -#define DS1603_CLK_100 (1 << 0) -#define DS1603_DATA_SHIFT_100 1 -#define DS1603_DATA_100 (1 << DS1603_DATA_SHIFT_100) - -/* Lasat 200 */ -#define DS1603_REG_200 (KSEG1ADDR(0x11000000)) -#define DS1603_RST_200 (1 << 3) -#define DS1603_CLK_200 (1 << 4) -#define DS1603_DATA_200 (1 << 5) - -#define DS1603_DATA_REG_200 (DS1603_REG_200 + 0x10000) -#define DS1603_DATA_READ_SHIFT_200 9 -#define DS1603_DATA_READ_200 (1 << DS1603_DATA_READ_SHIFT_200) diff --git a/arch/mips/include/asm/lasat/eeprom.h b/arch/mips/include/asm/lasat/eeprom.h deleted file mode 100644 index 24001a5cbb11..000000000000 --- a/arch/mips/include/asm/lasat/eeprom.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/addrspace.h> - -/* lasat 100 */ -#define AT93C_REG_100 KSEG1ADDR(0x1c810000) -#define AT93C_RDATA_REG_100 AT93C_REG_100 -#define AT93C_RDATA_SHIFT_100 4 -#define AT93C_WDATA_SHIFT_100 4 -#define AT93C_CS_M_100 (1 << 5) -#define AT93C_CLK_M_100 (1 << 3) - -/* lasat 200 */ -#define AT93C_REG_200 KSEG1ADDR(0x11000000) -#define AT93C_RDATA_REG_200 (AT93C_REG_200+0x10000) -#define AT93C_RDATA_SHIFT_200 8 -#define AT93C_WDATA_SHIFT_200 2 -#define AT93C_CS_M_200 (1 << 0) -#define AT93C_CLK_M_200 (1 << 1) diff --git a/arch/mips/include/asm/lasat/head.h b/arch/mips/include/asm/lasat/head.h deleted file mode 100644 index 20b0ecedd4b5..000000000000 --- a/arch/mips/include/asm/lasat/head.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Image header stuff - */ -#ifndef _HEAD_H -#define _HEAD_H - -#define LASAT_K_MAGIC0_VAL 0xfedeabba -#define LASAT_K_MAGIC1_VAL 0x00bedead - -#ifndef _LANGUAGE_ASSEMBLY -#include <linux/types.h> -struct bootloader_header { - u32 magic[2]; - u32 version; - u32 image_start; - u32 image_size; - u32 kernel_start; - u32 kernel_entry; -}; -#endif - -#endif /* _HEAD_H */ diff --git a/arch/mips/include/asm/lasat/lasat.h b/arch/mips/include/asm/lasat/lasat.h deleted file mode 100644 index 483be606960d..000000000000 --- a/arch/mips/include/asm/lasat/lasat.h +++ /dev/null @@ -1,245 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * lasat.h - * - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Configuration for LASAT boards, loads the appropriate include files. - */ -#ifndef _LASAT_H -#define _LASAT_H - -#ifndef _LANGUAGE_ASSEMBLY - -extern struct lasat_misc { - volatile u32 *reset_reg; - volatile u32 *flash_wp_reg; - u32 flash_wp_bit; -} *lasat_misc; - -enum lasat_mtdparts { - LASAT_MTD_BOOTLOADER, - LASAT_MTD_SERVICE, - LASAT_MTD_NORMAL, - LASAT_MTD_CONFIG, - LASAT_MTD_FS, - LASAT_MTD_LAST -}; - -/* - * The format of the data record in the EEPROM. - * See the LASAT Hardware Configuration field specification for a detailed - * description of the config field. - */ -#include <linux/types.h> - -#define LASAT_EEPROM_VERSION 7 -struct lasat_eeprom_struct { - unsigned int version; - unsigned int cfg[3]; - unsigned char hwaddr[6]; - unsigned char print_partno[12]; - unsigned char term0; - unsigned char print_serial[14]; - unsigned char term1; - unsigned char prod_partno[12]; - unsigned char term2; - unsigned char prod_serial[14]; - unsigned char term3; - unsigned char passwd_hash[16]; - unsigned char pwdnull; - unsigned char vendid; - unsigned char ts_ref; - unsigned char ts_signoff; - unsigned char reserved[11]; - unsigned char debugaccess; - unsigned short prid; - unsigned int serviceflag; - unsigned int ipaddr; - unsigned int netmask; - unsigned int crc32; -}; - -struct lasat_eeprom_struct_pre7 { - unsigned int version; - unsigned int flags[3]; - unsigned char hwaddr0[6]; - unsigned char hwaddr1[6]; - unsigned char print_partno[9]; - unsigned char term0; - unsigned char print_serial[14]; - unsigned char term1; - unsigned char prod_partno[9]; - unsigned char term2; - unsigned char prod_serial[14]; - unsigned char term3; - unsigned char passwd_hash[24]; - unsigned char pwdnull; - unsigned char vendor; - unsigned char ts_ref; - unsigned char ts_signoff; - unsigned char reserved[6]; - unsigned int writecount; - unsigned int ipaddr; - unsigned int netmask; - unsigned int crc32; -}; - -/* Configuration descriptor encoding - see the doc for details */ - -#define LASAT_W0_DSCTYPE(v) (((v)) & 0xf) -#define LASAT_W0_BMID(v) (((v) >> 0x04) & 0xf) -#define LASAT_W0_CPUTYPE(v) (((v) >> 0x08) & 0xf) -#define LASAT_W0_BUSSPEED(v) (((v) >> 0x0c) & 0xf) -#define LASAT_W0_CPUCLK(v) (((v) >> 0x10) & 0xf) -#define LASAT_W0_SDRAMBANKSZ(v) (((v) >> 0x14) & 0xf) -#define LASAT_W0_SDRAMBANKS(v) (((v) >> 0x18) & 0xf) -#define LASAT_W0_L2CACHE(v) (((v) >> 0x1c) & 0xf) - -#define LASAT_W1_EDHAC(v) (((v)) & 0xf) -#define LASAT_W1_HIFN(v) (((v) >> 0x04) & 0x1) -#define LASAT_W1_ISDN(v) (((v) >> 0x05) & 0x1) -#define LASAT_W1_IDE(v) (((v) >> 0x06) & 0x1) -#define LASAT_W1_HDLC(v) (((v) >> 0x07) & 0x1) -#define LASAT_W1_USVERSION(v) (((v) >> 0x08) & 0x1) -#define LASAT_W1_4MACS(v) (((v) >> 0x09) & 0x1) -#define LASAT_W1_EXTSERIAL(v) (((v) >> 0x0a) & 0x1) -#define LASAT_W1_FLASHSIZE(v) (((v) >> 0x0c) & 0xf) -#define LASAT_W1_PCISLOTS(v) (((v) >> 0x10) & 0xf) -#define LASAT_W1_PCI1OPT(v) (((v) >> 0x14) & 0xf) -#define LASAT_W1_PCI2OPT(v) (((v) >> 0x18) & 0xf) -#define LASAT_W1_PCI3OPT(v) (((v) >> 0x1c) & 0xf) - -/* Routines specific to LASAT boards */ - -#define LASAT_BMID_MASQUERADE2 0 -#define LASAT_BMID_MASQUERADEPRO 1 -#define LASAT_BMID_SAFEPIPE25 2 -#define LASAT_BMID_SAFEPIPE50 3 -#define LASAT_BMID_SAFEPIPE100 4 -#define LASAT_BMID_SAFEPIPE5000 5 -#define LASAT_BMID_SAFEPIPE7000 6 -#define LASAT_BMID_SAFEPIPE1000 7 -#if 0 -#define LASAT_BMID_SAFEPIPE30 7 -#define LASAT_BMID_SAFEPIPE5100 8 -#define LASAT_BMID_SAFEPIPE7100 9 -#endif -#define LASAT_BMID_UNKNOWN 0xf -#define LASAT_MAX_BMID_NAMES 9 /* no larger than 15! */ - -#define LASAT_HAS_EDHAC (1 << 0) -#define LASAT_EDHAC_FAST (1 << 1) -#define LASAT_HAS_EADI (1 << 2) -#define LASAT_HAS_HIFN (1 << 3) -#define LASAT_HAS_ISDN (1 << 4) -#define LASAT_HAS_LEASEDLINE_IF (1 << 5) -#define LASAT_HAS_HDC (1 << 6) - -#define LASAT_PRID_MASQUERADE2 0 -#define LASAT_PRID_MASQUERADEPRO 1 -#define LASAT_PRID_SAFEPIPE25 2 -#define LASAT_PRID_SAFEPIPE50 3 -#define LASAT_PRID_SAFEPIPE100 4 -#define LASAT_PRID_SAFEPIPE5000 5 -#define LASAT_PRID_SAFEPIPE7000 6 -#define LASAT_PRID_SAFEPIPE30 7 -#define LASAT_PRID_SAFEPIPE5100 8 -#define LASAT_PRID_SAFEPIPE7100 9 - -#define LASAT_PRID_SAFEPIPE1110 10 -#define LASAT_PRID_SAFEPIPE3020 11 -#define LASAT_PRID_SAFEPIPE3030 12 -#define LASAT_PRID_SAFEPIPE5020 13 -#define LASAT_PRID_SAFEPIPE5030 14 -#define LASAT_PRID_SAFEPIPE1120 15 -#define LASAT_PRID_SAFEPIPE1130 16 -#define LASAT_PRID_SAFEPIPE6010 17 -#define LASAT_PRID_SAFEPIPE6110 18 -#define LASAT_PRID_SAFEPIPE6210 19 -#define LASAT_PRID_SAFEPIPE1020 20 -#define LASAT_PRID_SAFEPIPE1040 21 -#define LASAT_PRID_SAFEPIPE1060 22 - -struct lasat_info { - unsigned int li_cpu_hz; - unsigned int li_bus_hz; - unsigned int li_bmid; - unsigned int li_memsize; - unsigned int li_flash_size; - unsigned int li_prid; - unsigned char li_bmstr[16]; - unsigned char li_namestr[32]; - unsigned char li_typestr[16]; - /* Info on the Flash layout */ - unsigned int li_flash_base; - unsigned long li_flashpart_base[LASAT_MTD_LAST]; - unsigned long li_flashpart_size[LASAT_MTD_LAST]; - struct lasat_eeprom_struct li_eeprom_info; - unsigned int li_eeprom_upgrade_version; - unsigned int li_debugaccess; -}; - -extern struct lasat_info lasat_board_info; - -static inline unsigned long lasat_flash_partition_start(int partno) -{ - if (partno < 0 || partno >= LASAT_MTD_LAST) - return 0; - - return lasat_board_info.li_flashpart_base[partno]; -} - -static inline unsigned long lasat_flash_partition_size(int partno) -{ - if (partno < 0 || partno >= LASAT_MTD_LAST) - return 0; - - return lasat_board_info.li_flashpart_size[partno]; -} - -/* Called from setup() to initialize the global board_info struct */ -extern int lasat_init_board_info(void); - -/* Write the modified EEPROM info struct */ -extern void lasat_write_eeprom_info(void); - -#define N_MACHTYPES 2 -/* for calibration of delays */ - -/* the lasat_ndelay function is necessary because it is used at an - * early stage of the boot process where ndelay is not calibrated. - * It is used for the bit-banging rtc and eeprom drivers */ - -#include <linux/delay.h> -#include <linux/smp.h> - -/* calculating with the slowest board with 100 MHz clock */ -#define LASAT_100_DIVIDER 20 -/* All 200's run at 250 MHz clock */ -#define LASAT_200_DIVIDER 8 - -extern unsigned int lasat_ndelay_divider; - -static inline void lasat_ndelay(unsigned int ns) -{ - __delay(ns / lasat_ndelay_divider); -} - -#define IS_LASAT_200() (current_cpu_data.cputype == CPU_R5000) - -#endif /* !defined (_LANGUAGE_ASSEMBLY) */ - -#define LASAT_SERVICEMODE_MAGIC_1 0xdeadbeef -#define LASAT_SERVICEMODE_MAGIC_2 0xfedeabba - -/* Lasat 100 boards */ -#define LASAT_GT_BASE (KSEG1ADDR(0x14000000)) - -/* Lasat 200 boards */ -#define Vrc5074_PHYS_BASE 0x1fa00000 -#define Vrc5074_BASE (KSEG1ADDR(Vrc5074_PHYS_BASE)) -#define PCI_WINDOW1 0x1a000000 - -#endif /* _LASAT_H */ diff --git a/arch/mips/include/asm/lasat/lasatint.h b/arch/mips/include/asm/lasat/lasatint.h deleted file mode 100644 index b2b346e0ca38..000000000000 --- a/arch/mips/include/asm/lasat/lasatint.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_LASAT_LASATINT_H -#define __ASM_LASAT_LASATINT_H - -/* lasat 100 */ -#define LASAT_INT_STATUS_REG_100 (KSEG1ADDR(0x1c880000)) -#define LASAT_INT_MASK_REG_100 (KSEG1ADDR(0x1c890000)) -#define LASATINT_MASK_SHIFT_100 0 - -/* lasat 200 */ -#define LASAT_INT_STATUS_REG_200 (KSEG1ADDR(0x1104003c)) -#define LASAT_INT_MASK_REG_200 (KSEG1ADDR(0x1104003c)) -#define LASATINT_MASK_SHIFT_200 16 - -#endif /* __ASM_LASAT_LASATINT_H */ diff --git a/arch/mips/include/asm/lasat/picvue.h b/arch/mips/include/asm/lasat/picvue.h deleted file mode 100644 index 99987c5a4b83..000000000000 --- a/arch/mips/include/asm/lasat/picvue.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Lasat 100 */ -#define PVC_REG_100 KSEG1ADDR(0x1c820000) -#define PVC_DATA_SHIFT_100 0 -#define PVC_DATA_M_100 0xFF -#define PVC_E_100 (1 << 8) -#define PVC_RW_100 (1 << 9) -#define PVC_RS_100 (1 << 10) - -/* Lasat 200 */ -#define PVC_REG_200 KSEG1ADDR(0x11000000) -#define PVC_DATA_SHIFT_200 24 -#define PVC_DATA_M_200 (0xFF << PVC_DATA_SHIFT_200) -#define PVC_E_200 (1 << 16) -#define PVC_RW_200 (1 << 17) -#define PVC_RS_200 (1 << 18) diff --git a/arch/mips/include/asm/lasat/serial.h b/arch/mips/include/asm/lasat/serial.h deleted file mode 100644 index 7b43d74089d1..000000000000 --- a/arch/mips/include/asm/lasat/serial.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/lasat/lasat.h> - -/* Lasat 100 boards serial configuration */ -#define LASAT_BASE_BAUD_100 (7372800 / 16) -#define LASAT_UART_REGS_BASE_100 0x1c8b0000 -#define LASAT_UART_REGS_SHIFT_100 2 -#define LASATINT_UART_100 16 - -/* * LASAT 200 boards serial configuration */ -#define LASAT_BASE_BAUD_200 (100000000 / 16 / 12) -#define LASAT_UART_REGS_BASE_200 (Vrc5074_PHYS_BASE + 0x0300) -#define LASAT_UART_REGS_SHIFT_200 3 -#define LASATINT_UART_200 21 diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index 6908b93c4ff9..99f1c3e4b11f 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -32,7 +32,7 @@ unsigned platform_maar_init(unsigned num_pairs); * @upper: The highest address that the MAAR pair will affect. Must be * aligned to one byte before a 2^16 byte boundary. * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The - * MIPS_MAAR_VL attribute will automatically be set. + * MIPS_MAAR_VL/MIPS_MAAR_VH attributes will automatically be set. * * Program the pair of MAAR registers specified by idx to apply the attributes * specified by attrs to the range of addresses from lower to higher. @@ -48,17 +48,30 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, /* Automatically set MIPS_MAAR_VL */ attrs |= MIPS_MAAR_VL; - /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */ + /* + * Write the upper address & attributes (both MIPS_MAAR_VL and + * MIPS_MAAR_VH matter) + */ write_c0_maari(idx << 1); back_to_back_c0_hazard(); write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); back_to_back_c0_hazard(); +#ifdef CONFIG_XPA + upper >>= MIPS_MAARX_ADDR_SHIFT; + writex_c0_maar(((upper >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH); + back_to_back_c0_hazard(); +#endif /* Write the lower address & attributes */ write_c0_maari((idx << 1) | 0x1); back_to_back_c0_hazard(); write_c0_maar((lower >> 4) | attrs); back_to_back_c0_hazard(); +#ifdef CONFIG_XPA + lower >>= MIPS_MAARX_ADDR_SHIFT; + writex_c0_maar(((lower >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH); + back_to_back_c0_hazard(); +#endif } /** diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h index 95a0b580909d..a54f20d956a2 100644 --- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h @@ -56,6 +56,5 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #endif /* __ASM_MACH_ATH25_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index e7c972fccd9f..79ab3ad9fee8 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -45,7 +45,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-au1x00/ioremap.h b/arch/mips/include/asm/mach-au1x00/ioremap.h deleted file mode 100644 index f6877ed8b8d0..000000000000 --- a/arch/mips/include/asm/mach-au1x00/ioremap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-mips/mach-au1x00/ioremap.h - */ -#ifndef __ASM_MACH_AU1X00_IOREMAP_H -#define __ASM_MACH_AU1X00_IOREMAP_H - -#include <linux/types.h> - -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI) -extern phys_addr_t __fixup_bigphys_addr(phys_addr_t, phys_addr_t); -#else -static inline phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} -#endif - -/* - * Allow physical addresses to be fixed up to help 36-bit peripherals. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return __fixup_bigphys_addr(phys_addr, size); -} - -static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ - return NULL; -} - -static inline int plat_iounmap(const volatile void __iomem *addr) -{ - return 0; -} - -#endif /* __ASM_MACH_AU1X00_IOREMAP_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h index 8fe88c2251e4..9212429d5edd 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h @@ -13,16 +13,16 @@ static inline unsigned long bcm63xx_gpio_count(void) case BCM6328_CPU_ID: return 32; case BCM3368_CPU_ID: - case BCM6358_CPU_ID: return 40; case BCM6338_CPU_ID: return 8; case BCM6345_CPU_ID: return 16; - case BCM6362_CPU_ID: - return 48; + case BCM6358_CPU_ID: case BCM6368_CPU_ID: return 38; + case BCM6362_CPU_ID: + return 48; case BCM6348_CPU_ID: default: return 37; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index bc3444cd4ef2..9ceb5e72889f 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -1367,8 +1367,8 @@ #define MISC_STRAPBUS_6328_REG 0x240 #define STRAPBUS_6328_FCVO_SHIFT 7 #define STRAPBUS_6328_FCVO_MASK (0x1f << STRAPBUS_6328_FCVO_SHIFT) -#define STRAPBUS_6328_BOOT_SEL_SERIAL (1 << 28) -#define STRAPBUS_6328_BOOT_SEL_NAND (0 << 28) +#define STRAPBUS_6328_BOOT_SEL_SERIAL (1 << 18) +#define STRAPBUS_6328_BOOT_SEL_NAND (0 << 18) /************************************************************************* * _REG relative to RSET_PCIE diff --git a/arch/mips/include/asm/mach-bcm63xx/ioremap.h b/arch/mips/include/asm/mach-bcm63xx/ioremap.h index 8cd261ec0a75..73f31825bbf3 100644 --- a/arch/mips/include/asm/mach-bcm63xx/ioremap.h +++ b/arch/mips/include/asm/mach-bcm63xx/ioremap.h @@ -4,11 +4,6 @@ #include <bcm63xx_cpu.h> -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline int is_bcm63xx_internal_registers(phys_addr_t offset) { switch (bcm63xx_get_cpu_id()) { diff --git a/arch/mips/include/asm/mach-bmips/ioremap.h b/arch/mips/include/asm/mach-bmips/ioremap.h index 52632ebc705f..63b4af9916b6 100644 --- a/arch/mips/include/asm/mach-bmips/ioremap.h +++ b/arch/mips/include/asm/mach-bmips/ioremap.h @@ -4,11 +4,6 @@ #include <linux/types.h> -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline int is_bmips_internal_registers(phys_addr_t offset) { if (offset >= 0xfff80000) diff --git a/arch/mips/include/asm/mach-emma2rh/irq.h b/arch/mips/include/asm/mach-emma2rh/irq.h deleted file mode 100644 index d32736736bb3..000000000000 --- a/arch/mips/include/asm/mach-emma2rh/irq.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003 by Ralf Baechle - */ -#ifndef __ASM_MACH_EMMA2RH_IRQ_H -#define __ASM_MACH_EMMA2RH_IRQ_H - -#define NR_IRQS 256 - -#include <asm/mach-generic/irq.h> - -#endif /* __ASM_MACH_EMMA2RH_IRQ_H */ diff --git a/arch/mips/include/asm/mach-generic/ioremap.h b/arch/mips/include/asm/mach-generic/ioremap.h index 4e36ea25ed33..f2442b84545c 100644 --- a/arch/mips/include/asm/mach-generic/ioremap.h +++ b/arch/mips/include/asm/mach-generic/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-generic/irq.h b/arch/mips/include/asm/mach-generic/irq.h index be546a0f65fa..72ac2c202c55 100644 --- a/arch/mips/include/asm/mach-generic/irq.h +++ b/arch/mips/include/asm/mach-generic/irq.h @@ -36,10 +36,4 @@ #endif /* CONFIG_IRQ_MIPS_CPU */ -#ifdef CONFIG_MIPS_GIC -#ifndef MIPS_GIC_IRQ_BASE -#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) -#endif -#endif /* CONFIG_MIPS_GIC */ - #endif /* __ASM_MACH_GENERIC_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ip27/spaces.h b/arch/mips/include/asm/mach-ip27/spaces.h index 24d5e31bcfa6..66421e9a6aa6 100644 --- a/arch/mips/include/asm/mach-ip27/spaces.h +++ b/arch/mips/include/asm/mach-ip27/spaces.h @@ -10,17 +10,19 @@ #ifndef _ASM_MACH_IP27_SPACES_H #define _ASM_MACH_IP27_SPACES_H +#include <linux/const.h> + /* * IP27 uses the R10000's uncached attribute feature. Attribute 3 selects * uncached memory addressing. Hide the definitions on 32-bit compilation * of the compat-vdso code. */ #ifdef CONFIG_64BIT -#define HSPEC_BASE 0x9000000000000000 -#define IO_BASE 0x9200000000000000 -#define MSPEC_BASE 0x9400000000000000 -#define UNCAC_BASE 0x9600000000000000 -#define CAC_BASE 0xa800000000000000 +#define HSPEC_BASE _AC(0x9000000000000000, UL) +#define IO_BASE _AC(0x9200000000000000, UL) +#define MSPEC_BASE _AC(0x9400000000000000, UL) +#define UNCAC_BASE _AC(0x9600000000000000, UL) +#define CAC_BASE _AC(0xa800000000000000, UL) #endif #define TO_MSPEC(x) (MSPEC_BASE | ((x) & TO_PHYS_MASK)) diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h index a98ba204f183..a1fa0c1f5300 100644 --- a/arch/mips/include/asm/mach-ip30/war.h +++ b/arch/mips/include/asm/mach-ip30/war.h @@ -8,19 +8,17 @@ #define R4600_V1_INDEX_ICACHEOP_WAR 0 #define R4600_V1_HIT_CACHEOP_WAR 0 #define R4600_V2_HIT_CACHEOP_WAR 0 -#define MIPS_CACHE_SYNC_WAR 0 #define BCM1250_M3_WAR 0 #define SIBYTE_1956_WAR 0 #define MIPS4K_ICACHE_REFILL_WAR 0 -#define MIPS34K_MISSED_ITLB_WAR 0 -#define R5432_CP0_INTERRUPT_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 #define TX49XX_ICACHE_INDEX_INV_WAR 0 #define ICACHE_REFILLS_WORKAROUND_WAR 0 - #ifdef CONFIG_CPU_R10000 #define R10000_LLSC_WAR 1 #else #define R10000_LLSC_WAR 0 #endif +#define MIPS34K_MISSED_ITLB_WAR 0 #endif /* __ASM_MIPS_MACH_IP30_WAR_H */ diff --git a/arch/mips/include/asm/mach-jz4740/base.h b/arch/mips/include/asm/mach-jz4740/base.h deleted file mode 100644 index 96b2d6674cdb..000000000000 --- a/arch/mips/include/asm/mach-jz4740/base.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_JZ4740_BASE_H__ -#define __ASM_MACH_JZ4740_BASE_H__ - -#define JZ4740_CPM_BASE_ADDR 0x10000000 -#define JZ4740_INTC_BASE_ADDR 0x10001000 -#define JZ4740_WDT_BASE_ADDR 0x10002000 -#define JZ4740_TCU_BASE_ADDR 0x10002010 -#define JZ4740_RTC_BASE_ADDR 0x10003000 -#define JZ4740_GPIO_BASE_ADDR 0x10010000 -#define JZ4740_AIC_BASE_ADDR 0x10020000 -#define JZ4740_MSC_BASE_ADDR 0x10021000 -#define JZ4740_UART0_BASE_ADDR 0x10030000 -#define JZ4740_UART1_BASE_ADDR 0x10031000 -#define JZ4740_I2C_BASE_ADDR 0x10042000 -#define JZ4740_SSI_BASE_ADDR 0x10043000 -#define JZ4740_SADC_BASE_ADDR 0x10070000 -#define JZ4740_EMC_BASE_ADDR 0x13010000 -#define JZ4740_DMAC_BASE_ADDR 0x13020000 -#define JZ4740_UHC_BASE_ADDR 0x13030000 -#define JZ4740_UDC_BASE_ADDR 0x13040000 -#define JZ4740_LCD_BASE_ADDR 0x13050000 -#define JZ4740_SLCD_BASE_ADDR 0x13050000 -#define JZ4740_CIM_BASE_ADDR 0x13060000 -#define JZ4740_IPU_BASE_ADDR 0x13080000 - -#endif diff --git a/arch/mips/include/asm/mach-jz4740/dma.h b/arch/mips/include/asm/mach-jz4740/dma.h deleted file mode 100644 index e5d2a5311a3a..000000000000 --- a/arch/mips/include/asm/mach-jz4740/dma.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ7420/JZ4740 DMA definitions - */ - -#ifndef __ASM_MACH_JZ4740_DMA_H__ -#define __ASM_MACH_JZ4740_DMA_H__ - -enum jz4740_dma_request_type { - JZ4740_DMA_TYPE_AUTO_REQUEST = 8, - JZ4740_DMA_TYPE_UART_TRANSMIT = 20, - JZ4740_DMA_TYPE_UART_RECEIVE = 21, - JZ4740_DMA_TYPE_SPI_TRANSMIT = 22, - JZ4740_DMA_TYPE_SPI_RECEIVE = 23, - JZ4740_DMA_TYPE_MMC_TRANSMIT = 26, - JZ4740_DMA_TYPE_MMC_RECEIVE = 27, - JZ4740_DMA_TYPE_TCU = 28, - JZ4740_DMA_TYPE_SADC = 29, - JZ4740_DMA_TYPE_SLCD = 30, -}; - -#endif /* __ASM_JZ4740_DMA_H__ */ diff --git a/arch/mips/include/asm/mach-jz4740/irq.h b/arch/mips/include/asm/mach-jz4740/irq.h index 09c38eac671a..27c543bd340f 100644 --- a/arch/mips/include/asm/mach-jz4740/irq.h +++ b/arch/mips/include/asm/mach-jz4740/irq.h @@ -8,49 +8,6 @@ #define __ASM_MACH_JZ4740_IRQ_H__ #define MIPS_CPU_IRQ_BASE 0 -#define JZ4740_IRQ_BASE 8 - -#ifdef CONFIG_MACH_JZ4740 -# define NR_INTC_IRQS 32 -#else -# define NR_INTC_IRQS 64 -#endif - -/* 1st-level interrupts */ -#define JZ4740_IRQ(x) (JZ4740_IRQ_BASE + (x)) -#define JZ4740_IRQ_I2C JZ4740_IRQ(1) -#define JZ4740_IRQ_UHC JZ4740_IRQ(3) -#define JZ4740_IRQ_UART1 JZ4740_IRQ(8) -#define JZ4740_IRQ_UART0 JZ4740_IRQ(9) -#define JZ4740_IRQ_SADC JZ4740_IRQ(12) -#define JZ4740_IRQ_MSC JZ4740_IRQ(14) -#define JZ4740_IRQ_RTC JZ4740_IRQ(15) -#define JZ4740_IRQ_SSI JZ4740_IRQ(16) -#define JZ4740_IRQ_CIM JZ4740_IRQ(17) -#define JZ4740_IRQ_AIC JZ4740_IRQ(18) -#define JZ4740_IRQ_ETH JZ4740_IRQ(19) -#define JZ4740_IRQ_DMAC JZ4740_IRQ(20) -#define JZ4740_IRQ_TCU2 JZ4740_IRQ(21) -#define JZ4740_IRQ_TCU1 JZ4740_IRQ(22) -#define JZ4740_IRQ_TCU0 JZ4740_IRQ(23) -#define JZ4740_IRQ_UDC JZ4740_IRQ(24) -#define JZ4740_IRQ_GPIO3 JZ4740_IRQ(25) -#define JZ4740_IRQ_GPIO2 JZ4740_IRQ(26) -#define JZ4740_IRQ_GPIO1 JZ4740_IRQ(27) -#define JZ4740_IRQ_GPIO0 JZ4740_IRQ(28) -#define JZ4740_IRQ_IPU JZ4740_IRQ(29) -#define JZ4740_IRQ_LCD JZ4740_IRQ(30) - -#define JZ4780_IRQ_TCU2 JZ4740_IRQ(25) - -/* 2nd-level interrupts */ -#define JZ4740_IRQ_DMA(x) (JZ4740_IRQ(NR_INTC_IRQS) + (x)) - -#define JZ4740_IRQ_INTC_GPIO(x) (JZ4740_IRQ_GPIO0 - (x)) -#define JZ4740_IRQ_GPIO(x) (JZ4740_IRQ(NR_INTC_IRQS + 16) + (x)) - -#define JZ4740_IRQ_ADC_BASE JZ4740_IRQ(NR_INTC_IRQS + 144) - -#define NR_IRQS (JZ4740_IRQ_ADC_BASE + 6) +#define NR_IRQS 256 #endif diff --git a/arch/mips/include/asm/mach-jz4740/timer.h b/arch/mips/include/asm/mach-jz4740/timer.h deleted file mode 100644 index 8a19cfe5bed7..000000000000 --- a/arch/mips/include/asm/mach-jz4740/timer.h +++ /dev/null @@ -1,126 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform timer support - */ - -#ifndef __ASM_MACH_JZ4740_TIMER -#define __ASM_MACH_JZ4740_TIMER - -#define JZ_REG_TIMER_STOP 0x0C -#define JZ_REG_TIMER_STOP_SET 0x1C -#define JZ_REG_TIMER_STOP_CLEAR 0x2C -#define JZ_REG_TIMER_ENABLE 0x00 -#define JZ_REG_TIMER_ENABLE_SET 0x04 -#define JZ_REG_TIMER_ENABLE_CLEAR 0x08 -#define JZ_REG_TIMER_FLAG 0x10 -#define JZ_REG_TIMER_FLAG_SET 0x14 -#define JZ_REG_TIMER_FLAG_CLEAR 0x18 -#define JZ_REG_TIMER_MASK 0x20 -#define JZ_REG_TIMER_MASK_SET 0x24 -#define JZ_REG_TIMER_MASK_CLEAR 0x28 - -#define JZ_REG_TIMER_DFR(x) (((x) * 0x10) + 0x30) -#define JZ_REG_TIMER_DHR(x) (((x) * 0x10) + 0x34) -#define JZ_REG_TIMER_CNT(x) (((x) * 0x10) + 0x38) -#define JZ_REG_TIMER_CTRL(x) (((x) * 0x10) + 0x3C) - -#define JZ_TIMER_IRQ_HALF(x) BIT((x) + 0x10) -#define JZ_TIMER_IRQ_FULL(x) BIT(x) - -#define JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN BIT(9) -#define JZ_TIMER_CTRL_PWM_ACTIVE_LOW BIT(8) -#define JZ_TIMER_CTRL_PWM_ENABLE BIT(7) -#define JZ_TIMER_CTRL_PRESCALE_MASK 0x1c -#define JZ_TIMER_CTRL_PRESCALE_OFFSET 0x3 -#define JZ_TIMER_CTRL_PRESCALE_1 (0 << 3) -#define JZ_TIMER_CTRL_PRESCALE_4 (1 << 3) -#define JZ_TIMER_CTRL_PRESCALE_16 (2 << 3) -#define JZ_TIMER_CTRL_PRESCALE_64 (3 << 3) -#define JZ_TIMER_CTRL_PRESCALE_256 (4 << 3) -#define JZ_TIMER_CTRL_PRESCALE_1024 (5 << 3) - -#define JZ_TIMER_CTRL_PRESCALER(x) ((x) << JZ_TIMER_CTRL_PRESCALE_OFFSET) - -#define JZ_TIMER_CTRL_SRC_EXT BIT(2) -#define JZ_TIMER_CTRL_SRC_RTC BIT(1) -#define JZ_TIMER_CTRL_SRC_PCLK BIT(0) - -extern void __iomem *jz4740_timer_base; -void __init jz4740_timer_init(void); - -void jz4740_timer_enable_watchdog(void); -void jz4740_timer_disable_watchdog(void); - -static inline void jz4740_timer_stop(unsigned int timer) -{ - writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); -} - -static inline void jz4740_timer_start(unsigned int timer) -{ - writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); -} - -static inline bool jz4740_timer_is_enabled(unsigned int timer) -{ - return readb(jz4740_timer_base + JZ_REG_TIMER_ENABLE) & BIT(timer); -} - -static inline void jz4740_timer_enable(unsigned int timer) -{ - writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_SET); -} - -static inline void jz4740_timer_disable(unsigned int timer) -{ - writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_CLEAR); -} - -static inline void jz4740_timer_set_period(unsigned int timer, uint16_t period) -{ - writew(period, jz4740_timer_base + JZ_REG_TIMER_DFR(timer)); -} - -static inline void jz4740_timer_set_duty(unsigned int timer, uint16_t duty) -{ - writew(duty, jz4740_timer_base + JZ_REG_TIMER_DHR(timer)); -} - -static inline void jz4740_timer_set_count(unsigned int timer, uint16_t count) -{ - writew(count, jz4740_timer_base + JZ_REG_TIMER_CNT(timer)); -} - -static inline uint16_t jz4740_timer_get_count(unsigned int timer) -{ - return readw(jz4740_timer_base + JZ_REG_TIMER_CNT(timer)); -} - -static inline void jz4740_timer_ack_full(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR); -} - -static inline void jz4740_timer_irq_full_enable(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR); - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_CLEAR); -} - -static inline void jz4740_timer_irq_full_disable(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_SET); -} - -static inline void jz4740_timer_set_ctrl(unsigned int timer, uint16_t ctrl) -{ - writew(ctrl, jz4740_timer_base + JZ_REG_TIMER_CTRL(timer)); -} - -static inline uint16_t jz4740_timer_get_ctrl(unsigned int timer) -{ - return readw(jz4740_timer_base + JZ_REG_TIMER_CTRL(timer)); -} - -#endif diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h index f03c1c42dd90..10226976f7b7 100644 --- a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h @@ -46,7 +46,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-lasat/irq.h b/arch/mips/include/asm/mach-lasat/irq.h deleted file mode 100644 index e8994921779e..000000000000 --- a/arch/mips/include/asm/mach-lasat/irq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_MACH_LASAT_IRQ_H -#define _ASM_MACH_LASAT_IRQ_H - -#define LASAT_CASCADE_IRQ (MIPS_CPU_IRQ_BASE + 2) - -#define LASAT_IRQ_BASE 8 -#define LASAT_IRQ_END 23 - -#define NR_IRQS 24 - -#include <asm/mach-generic/irq.h> - -#endif /* _ASM_MACH_LASAT_IRQ_H */ diff --git a/arch/mips/include/asm/mach-lasat/mach-gt64120.h b/arch/mips/include/asm/mach-lasat/mach-gt64120.h deleted file mode 100644 index 6666a8871a23..000000000000 --- a/arch/mips/include/asm/mach-lasat/mach-gt64120.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is a direct copy of the ev96100.h file, with a global - * search and replace. The numbers are the same. - * - * The reason I'm duplicating this is so that the 64120/96100 - * defines won't be confusing in the source code. - */ -#ifndef _ASM_GT64120_LASAT_GT64120_DEP_H -#define _ASM_GT64120_LASAT_GT64120_DEP_H - -/* - * GT64120 config space base address on Lasat 100 - */ -#define GT64120_BASE (KSEG1ADDR(0x14000000)) - -/* - * PCI Bus allocation - * - * (Guessing ...) - */ -#define GT_PCI_MEM_BASE 0x12000000UL -#define GT_PCI_MEM_SIZE 0x02000000UL -#define GT_PCI_IO_BASE 0x10000000UL -#define GT_PCI_IO_SIZE 0x02000000UL -#define GT_ISA_IO_BASE PCI_IO_BASE - -#endif /* _ASM_GT64120_LASAT_GT64120_DEP_H */ diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h index 5008af0a1a19..57e571128489 100644 --- a/arch/mips/include/asm/mach-loongson2ef/loongson.h +++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h @@ -244,6 +244,7 @@ static inline void do_perfcnt_IRQ(void) #ifdef CONFIG_CPU_SUPPORTS_CPUFREQ #include <linux/cpufreq.h> extern struct cpufreq_frequency_table loongson2_clockmod_table[]; +extern int loongson2_cpu_set_rate(unsigned long rate_khz); #endif /* diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 2ed483e32d8c..b35be709f9da 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -192,6 +192,11 @@ struct boot_params { struct efi_reset_system_t reset_system; }; +enum loongson_bridge_type { + LS7A = 1, + RS780E = 2 +}; + struct loongson_system_configuration { u32 nr_cpus; u32 nr_nodes; @@ -200,6 +205,7 @@ struct loongson_system_configuration { u16 boot_cpu_id; u16 reserved_cpus_mask; enum loongson_cpu_type cputype; + enum loongson_bridge_type bridgetype; u64 ht_control_base; u64 pci_mem_start_addr; u64 pci_mem_end_addr; @@ -215,9 +221,14 @@ struct loongson_system_configuration { u32 nr_sensors; struct sensor_device sensors[MAX_SENSORS]; u64 workarounds; + void (*early_config)(void); }; extern struct efi_memory_map_loongson *loongson_memmap; extern struct loongson_system_configuration loongson_sysconf; +extern u32 node_id_offset; +extern void ls7a_early_config(void); +extern void rs780e_early_config(void); + #endif diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index 4fab38c743dd..b6e9c99b85a5 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -48,5 +48,6 @@ #define cpu_hwrena_impl_bits 0xc0000000 #define cpu_has_mac2008_only 1 #define cpu_has_mips_r2_exec_hazard 0 +#define cpu_has_perf_cntr_intr_bit 0 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h new file mode 100644 index 000000000000..d64af19c210d --- /dev/null +++ b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ +#define _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ + +#include <asm/cpu-info.h> + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + +#include <loongson_regs.h> + +#define LOONGSON_FPREV_MASK 0x7 + +void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c); + +static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c) +{ + /* All supported cores have non-zero LOONGSON_CFG1 data. */ + return c->loongson3_cpucfg_data[0] != 0; +} + +static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c, + __u64 sel) +{ + switch (sel) { + case LOONGSON_CFG0: + return c->processor_id; + case LOONGSON_CFG1: + case LOONGSON_CFG2: + case LOONGSON_CFG3: + return c->loongson3_cpucfg_data[sel - 1]; + case LOONGSON_CFG4: + case LOONGSON_CFG5: + /* CPUCFG selects 4 and 5 are related to the input clock + * signal. + * + * Unimplemented for now. + */ + return 0; + case LOONGSON_CFG6: + /* CPUCFG select 6 is for the undocumented Safe Extension. */ + return 0; + case LOONGSON_CFG7: + /* CPUCFG select 7 is for the virtualization extension. + * We don't know if the two currently known features are + * supported on older cores according to the public + * documentation, so leave this at zero. + */ + return 0; + } + + /* + * Return 0 for unrecognized CPUCFG selects, which is real hardware + * behavior observed on Loongson 3A R4. + */ + return 0; +} +#else +static inline void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c) +{ +} + +static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c) +{ + return false; +} + +static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c, + __u64 sel) +{ + return 0; +} +#endif + +#endif /* _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ */ diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h index 363a47a5d26e..83dbb9fdf9c2 100644 --- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h +++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h @@ -67,6 +67,8 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG1_SFBP BIT(29) #define LOONGSON_CFG1_CDMAP BIT(30) +#define LOONGSON_CFG1_FPREV_OFFSET 1 + #define LOONGSON_CFG2 0x2 #define LOONGSON_CFG2_LEXT1 BIT(0) #define LOONGSON_CFG2_LEXT2 BIT(1) @@ -77,12 +79,12 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG2_LBT3 BIT(6) #define LOONGSON_CFG2_LBTMMU BIT(7) #define LOONGSON_CFG2_LPMP BIT(8) -#define LOONGSON_CFG2_LPMPREV GENMASK(11, 9) +#define LOONGSON_CFG2_LPMREV GENMASK(11, 9) #define LOONGSON_CFG2_LAMO BIT(12) #define LOONGSON_CFG2_LPIXU BIT(13) -#define LOONGSON_CFG2_LPIXUN BIT(14) -#define LOONGSON_CFG2_LZVP BIT(15) -#define LOONGSON_CFG2_LZVREV GENMASK(18, 16) +#define LOONGSON_CFG2_LPIXNU BIT(14) +#define LOONGSON_CFG2_LVZP BIT(15) +#define LOONGSON_CFG2_LVZREV GENMASK(18, 16) #define LOONGSON_CFG2_LGFTP BIT(19) #define LOONGSON_CFG2_LGFTPREV GENMASK(22, 20) #define LOONGSON_CFG2_LLFTP BIT(23) @@ -90,6 +92,13 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG2_LCSRP BIT(27) #define LOONGSON_CFG2_LDISBLIKELY BIT(28) +#define LOONGSON_CFG2_LPMREV_OFFSET 9 +#define LOONGSON_CFG2_LPM_REV1 (1 << LOONGSON_CFG2_LPMREV_OFFSET) +#define LOONGSON_CFG2_LPM_REV2 (2 << LOONGSON_CFG2_LPMREV_OFFSET) +#define LOONGSON_CFG2_LVZREV_OFFSET 16 +#define LOONGSON_CFG2_LVZ_REV1 (1 << LOONGSON_CFG2_LVZREV_OFFSET) +#define LOONGSON_CFG2_LVZ_REV2 (2 << LOONGSON_CFG2_LVZREV_OFFSET) + #define LOONGSON_CFG3 0x3 #define LOONGSON_CFG3_LCAMP BIT(0) #define LOONGSON_CFG3_LCAMREV GENMASK(3, 1) @@ -97,6 +106,16 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG3_LCAMKW GENMASK(19, 12) #define LOONGSON_CFG3_LCAMVW GENMASK(27, 20) +#define LOONGSON_CFG3_LCAMREV_OFFSET 1 +#define LOONGSON_CFG3_LCAM_REV1 (1 << LOONGSON_CFG3_LCAMREV_OFFSET) +#define LOONGSON_CFG3_LCAM_REV2 (2 << LOONGSON_CFG3_LCAMREV_OFFSET) +#define LOONGSON_CFG3_LCAMNUM_OFFSET 4 +#define LOONGSON_CFG3_LCAMNUM_REV1 (0x3f << LOONGSON_CFG3_LCAMNUM_OFFSET) +#define LOONGSON_CFG3_LCAMKW_OFFSET 12 +#define LOONGSON_CFG3_LCAMKW_REV1 (0x27 << LOONGSON_CFG3_LCAMKW_OFFSET) +#define LOONGSON_CFG3_LCAMVW_OFFSET 20 +#define LOONGSON_CFG3_LCAMVW_REV1 (0x3f << LOONGSON_CFG3_LCAMVW_OFFSET) + #define LOONGSON_CFG4 0x4 #define LOONGSON_CFG4_CCFREQ GENMASK(31, 0) @@ -139,7 +158,7 @@ static inline u64 csr_readq(u32 reg) { u64 __res; - /* DWRCSR reg, val */ + /* DRDCSR reg, val */ __asm__ __volatile__( "parse_r __res,%0\n\t" "parse_r reg,%1\n\t" diff --git a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h b/arch/mips/include/asm/mach-loongson64/mc146818rtc.h deleted file mode 100644 index ebdccfee50be..000000000000 --- a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org) - * - * RTC routines for PC style attached Dallas chip. - */ -#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H -#define __ASM_MACH_LOONGSON64_MC146818RTC_H - -#include <linux/io.h> - -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_IRQ 8 - -static inline unsigned char CMOS_READ(unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - return inb_p(RTC_PORT(1)); -} - -static inline void CMOS_WRITE(unsigned char data, unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - outb_p(data, RTC_PORT(1)); -} - -#define RTC_ALWAYS_BCD 0 - -#ifndef mc146818_decode_year -#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) -#endif - -#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-loongson64/spaces.h b/arch/mips/include/asm/mach-loongson64/spaces.h index e85bc1d9c4f2..3de0ac9d8829 100644 --- a/arch/mips/include/asm/mach-loongson64/spaces.h +++ b/arch/mips/include/asm/mach-loongson64/spaces.h @@ -6,5 +6,13 @@ #define CAC_BASE _AC(0x9800000000000000, UL) #endif /* CONFIG_64BIT */ +/* Skip 128k to trap NULL pointer dereferences */ +#define PCI_IOBASE _AC(0xc000000000000000 + SZ_128K, UL) +#define PCI_IOSIZE SZ_16M +#define MAP_BASE (PCI_IOBASE + PCI_IOSIZE) + +/* Reserved at the start of PCI_IOBASE for legacy drivers */ +#define MMIO_LOWER_RESERVED 0x10000 + #include <asm/mach-generic/spaces.h> #endif diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h deleted file mode 100644 index 016fa9446ba9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003, 04, 07 Ralf Baechle (ralf@linux-mips.org) - */ -#ifndef __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H -#define __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H - -#define cpu_has_mips16 1 -#define cpu_has_dsp 1 -/* #define cpu_has_dsp2 ??? - do runtime detection */ -#define cpu_has_mipsmt 1 -#define cpu_has_fpu 0 - -#define cpu_has_mips32r1 0 -#define cpu_has_mips32r2 1 -#define cpu_has_mips64r1 0 -#define cpu_has_mips64r2 0 - -#endif /* __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h deleted file mode 100644 index 50de6876e1c9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt controller. - * - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_CIC_INT_H -#define _MSP_CIC_INT_H - -/* - * The PMC-Sierra CIC interrupts are all centrally managed by the - * CIC sub-system. - * We attempt to keep the interrupt numbers as consistent as possible - * across all of the MSP devices, but some differences will creep in ... - * The interrupts which are directly forwarded to the MIPS core interrupts - * are assigned interrupts in the range 0-7, interrupts cascaded through - * the CIC are assigned interrupts 8-39. The cascade occurs on C_IRQ4 - * (MSP_INT_CIC). Currently we don't really distinguish between VPE1 - * and VPE0 (or thread contexts for that matter). Will have to fix. - * The PER interrupts are assigned interrupts in the range 40-71. -*/ - - -/* - * IRQs directly forwarded to the CPU - */ -#define MSP_MIPS_INTBASE 0 -#define MSP_INT_SW0 0 /* IRQ for swint0, C_SW0 */ -#define MSP_INT_SW1 1 /* IRQ for swint1, C_SW1 */ -#define MSP_INT_MAC0 2 /* IRQ for MAC 0, C_IRQ0 */ -#define MSP_INT_MAC1 3 /* IRQ for MAC 1, C_IRQ1 */ -#define MSP_INT_USB 4 /* IRQ for USB, C_IRQ2 */ -#define MSP_INT_SAR 5 /* IRQ for ADSL2+ SAR, C_IRQ3 */ -#define MSP_INT_CIC 6 /* IRQ for CIC block, C_IRQ4 */ -#define MSP_INT_SEC 7 /* IRQ for Sec engine, C_IRQ5 */ - -/* - * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4) - * These defines should be tied to the register definitions for the CIC - * interrupt routine. For now, just use hard-coded values. - */ -#define MSP_CIC_INTBASE (MSP_MIPS_INTBASE + 8) -#define MSP_INT_EXT0 (MSP_CIC_INTBASE + 0) - /* External interrupt 0 */ -#define MSP_INT_EXT1 (MSP_CIC_INTBASE + 1) - /* External interrupt 1 */ -#define MSP_INT_EXT2 (MSP_CIC_INTBASE + 2) - /* External interrupt 2 */ -#define MSP_INT_EXT3 (MSP_CIC_INTBASE + 3) - /* External interrupt 3 */ -#define MSP_INT_CPUIF (MSP_CIC_INTBASE + 4) - /* CPU interface interrupt */ -#define MSP_INT_EXT4 (MSP_CIC_INTBASE + 5) - /* External interrupt 4 */ -#define MSP_INT_CIC_USB (MSP_CIC_INTBASE + 6) - /* Cascaded IRQ for USB */ -#define MSP_INT_MBOX (MSP_CIC_INTBASE + 7) - /* Sec engine mailbox IRQ */ -#define MSP_INT_EXT5 (MSP_CIC_INTBASE + 8) - /* External interrupt 5 */ -#define MSP_INT_TDM (MSP_CIC_INTBASE + 9) - /* TDM interrupt */ -#define MSP_INT_CIC_MAC0 (MSP_CIC_INTBASE + 10) - /* Cascaded IRQ for MAC 0 */ -#define MSP_INT_CIC_MAC1 (MSP_CIC_INTBASE + 11) - /* Cascaded IRQ for MAC 1 */ -#define MSP_INT_CIC_SEC (MSP_CIC_INTBASE + 12) - /* Cascaded IRQ for sec engine */ -#define MSP_INT_PER (MSP_CIC_INTBASE + 13) - /* Peripheral interrupt */ -#define MSP_INT_TIMER0 (MSP_CIC_INTBASE + 14) - /* SLP timer 0 */ -#define MSP_INT_TIMER1 (MSP_CIC_INTBASE + 15) - /* SLP timer 1 */ -#define MSP_INT_TIMER2 (MSP_CIC_INTBASE + 16) - /* SLP timer 2 */ -#define MSP_INT_VPE0_TIMER (MSP_CIC_INTBASE + 17) - /* VPE0 MIPS timer */ -#define MSP_INT_BLKCP (MSP_CIC_INTBASE + 18) - /* Block Copy */ -#define MSP_INT_UART0 (MSP_CIC_INTBASE + 19) - /* UART 0 */ -#define MSP_INT_PCI (MSP_CIC_INTBASE + 20) - /* PCI subsystem */ -#define MSP_INT_EXT6 (MSP_CIC_INTBASE + 21) - /* External interrupt 5 */ -#define MSP_INT_PCI_MSI (MSP_CIC_INTBASE + 22) - /* PCI Message Signal */ -#define MSP_INT_CIC_SAR (MSP_CIC_INTBASE + 23) - /* Cascaded ADSL2+ SAR IRQ */ -#define MSP_INT_DSL (MSP_CIC_INTBASE + 24) - /* ADSL2+ IRQ */ -#define MSP_INT_CIC_ERR (MSP_CIC_INTBASE + 25) - /* SLP error condition */ -#define MSP_INT_VPE1_TIMER (MSP_CIC_INTBASE + 26) - /* VPE1 MIPS timer */ -#define MSP_INT_VPE0_PC (MSP_CIC_INTBASE + 27) - /* VPE0 Performance counter */ -#define MSP_INT_VPE1_PC (MSP_CIC_INTBASE + 28) - /* VPE1 Performance counter */ -#define MSP_INT_EXT7 (MSP_CIC_INTBASE + 29) - /* External interrupt 5 */ -#define MSP_INT_VPE0_SW (MSP_CIC_INTBASE + 30) - /* VPE0 Software interrupt */ -#define MSP_INT_VPE1_SW (MSP_CIC_INTBASE + 31) - /* VPE0 Software interrupt */ - -/* - * IRQs cascaded on CIC PER interrupt (MSP_INT_PER) - */ -#define MSP_PER_INTBASE (MSP_CIC_INTBASE + 32) -/* Reserved 0-1 */ -#define MSP_INT_UART1 (MSP_PER_INTBASE + 2) - /* UART 1 */ -/* Reserved 3-5 */ -#define MSP_INT_2WIRE (MSP_PER_INTBASE + 6) - /* 2-wire */ -#define MSP_INT_TM0 (MSP_PER_INTBASE + 7) - /* Peripheral timer block out 0 */ -#define MSP_INT_TM1 (MSP_PER_INTBASE + 8) - /* Peripheral timer block out 1 */ -/* Reserved 9 */ -#define MSP_INT_SPRX (MSP_PER_INTBASE + 10) - /* SPI RX complete */ -#define MSP_INT_SPTX (MSP_PER_INTBASE + 11) - /* SPI TX complete */ -#define MSP_INT_GPIO (MSP_PER_INTBASE + 12) - /* GPIO */ -#define MSP_INT_PER_ERR (MSP_PER_INTBASE + 13) - /* Peripheral error */ -/* Reserved 14-31 */ - -#endif /* !_MSP_CIC_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h deleted file mode 100644 index daacebb047c2..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h +++ /dev/null @@ -1,343 +0,0 @@ -/* - * - * Macros for external SMP-safe access to the PMC MSP71xx reference - * board GPIO pins - * - * Copyright 2010 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __MSP_GPIO_MACROS_H__ -#define __MSP_GPIO_MACROS_H__ - -#include <msp_regops.h> -#include <msp_regs.h> - -#ifdef CONFIG_PMC_MSP7120_GW -#define MSP_NUM_GPIOS 20 -#else -#define MSP_NUM_GPIOS 28 -#endif - -/* -- GPIO Enumerations -- */ -enum msp_gpio_data { - MSP_GPIO_LO = 0, - MSP_GPIO_HI = 1, - MSP_GPIO_NONE, /* Special - Means pin is out of range */ - MSP_GPIO_TOGGLE, /* Special - Sets pin to opposite */ -}; - -enum msp_gpio_mode { - MSP_GPIO_INPUT = 0x0, - /* MSP_GPIO_ INTERRUPT = 0x1, Not supported yet */ - MSP_GPIO_UART_INPUT = 0x2, /* Only GPIO 4 or 5 */ - MSP_GPIO_OUTPUT = 0x8, - MSP_GPIO_UART_OUTPUT = 0x9, /* Only GPIO 2 or 3 */ - MSP_GPIO_PERIF_TIMERA = 0x9, /* Only GPIO 0 or 1 */ - MSP_GPIO_PERIF_TIMERB = 0xa, /* Only GPIO 0 or 1 */ - MSP_GPIO_UNKNOWN = 0xb, /* No such GPIO or mode */ -}; - -/* -- Static Tables -- */ - -/* Maps pins to data register */ -static volatile u32 * const MSP_GPIO_DATA_REGISTER[] = { - /* GPIO 0 and 1 on the first register */ - GPIO_DATA1_REG, GPIO_DATA1_REG, - /* GPIO 2, 3, 4, and 5 on the second register */ - GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, - /* GPIO 6, 7, 8, and 9 on the third register */ - GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, - GPIO_DATA4_REG, GPIO_DATA4_REG, - /* GPIO 16 - 23 on the first strange EXTENDED register */ - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - /* GPIO 24 - 27 on the second strange EXTENDED register */ - EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, - EXTENDED_GPIO2_REG, -}; - -/* Maps pins to mode register */ -static volatile u32 * const MSP_GPIO_MODE_REGISTER[] = { - /* GPIO 0 and 1 on the first register */ - GPIO_CFG1_REG, GPIO_CFG1_REG, - /* GPIO 2, 3, 4, and 5 on the second register */ - GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, - /* GPIO 6, 7, 8, and 9 on the third register */ - GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, - GPIO_CFG4_REG, GPIO_CFG4_REG, - /* GPIO 16 - 23 on the first strange EXTENDED register */ - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - /* GPIO 24 - 27 on the second strange EXTENDED register */ - EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, - EXTENDED_GPIO2_REG, -}; - -/* Maps 'basic' pins to relative offset from 0 per register */ -static int MSP_GPIO_OFFSET[] = { - /* GPIO 0 and 1 on the first register */ - 0, 0, - /* GPIO 2, 3, 4, and 5 on the second register */ - 2, 2, 2, 2, - /* GPIO 6, 7, 8, and 9 on the third register */ - 6, 6, 6, 6, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - 10, 10, 10, 10, 10, 10, -}; - -/* Maps MODE to allowed pin mask */ -static unsigned int MSP_GPIO_MODE_ALLOWED[] = { - 0xffffffff, /* Mode 0 - INPUT */ - 0x00000, /* Mode 1 - INTERRUPT */ - 0x00030, /* Mode 2 - UART_INPUT (GPIO 4, 5)*/ - 0, 0, 0, 0, 0, /* Modes 3, 4, 5, 6, and 7 are reserved */ - 0xffffffff, /* Mode 8 - OUTPUT */ - 0x0000f, /* Mode 9 - UART_OUTPUT/ - PERF_TIMERA (GPIO 0, 1, 2, 3) */ - 0x00003, /* Mode a - PERF_TIMERB (GPIO 0, 1) */ - 0x00000, /* Mode b - Not really a mode! */ -}; - -/* -- Bit masks -- */ - -/* This gives you the 'register relative offset gpio' number */ -#define OFFSET_GPIO_NUMBER(gpio) (gpio - MSP_GPIO_OFFSET[gpio]) - -/* These take the 'register relative offset gpio' number */ -#define BASIC_DATA_REG_MASK(ogpio) (1 << ogpio) -#define BASIC_MODE_REG_VALUE(mode, ogpio) \ - (mode << BASIC_MODE_REG_SHIFT(ogpio)) -#define BASIC_MODE_REG_MASK(ogpio) \ - BASIC_MODE_REG_VALUE(0xf, ogpio) -#define BASIC_MODE_REG_SHIFT(ogpio) (ogpio * 4) -#define BASIC_MODE_REG_FROM_REG(data, ogpio) \ - ((data & BASIC_MODE_REG_MASK(ogpio)) >> BASIC_MODE_REG_SHIFT(ogpio)) - -/* These take the actual GPIO number (0 through 15) */ -#define BASIC_DATA_MASK(gpio) \ - BASIC_DATA_REG_MASK(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_MASK(gpio) \ - BASIC_MODE_REG_MASK(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE(mode, gpio) \ - BASIC_MODE_REG_VALUE(mode, OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_SHIFT(gpio) \ - BASIC_MODE_REG_SHIFT(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_FROM_REG(data, gpio) \ - BASIC_MODE_REG_FROM_REG(data, OFFSET_GPIO_NUMBER(gpio)) - -/* - * Each extended GPIO register is 32 bits long and is responsible for up to - * eight GPIOs. The least significant 16 bits contain the set and clear bit - * pair for each of the GPIOs. The most significant 16 bits contain the - * disable and enable bit pair for each of the GPIOs. For example, the - * extended GPIO reg for GPIOs 16-23 is as follows: - * - * 31: GPIO23_DISABLE - * ... - * 19: GPIO17_DISABLE - * 18: GPIO17_ENABLE - * 17: GPIO16_DISABLE - * 16: GPIO16_ENABLE - * ... - * 3: GPIO17_SET - * 2: GPIO17_CLEAR - * 1: GPIO16_SET - * 0: GPIO16_CLEAR - */ - -/* This gives the 'register relative offset gpio' number */ -#define EXTENDED_OFFSET_GPIO(gpio) (gpio < 24 ? gpio - 16 : gpio - 24) - -/* These take the 'register relative offset gpio' number */ -#define EXTENDED_REG_DISABLE(ogpio) (0x2 << ((ogpio * 2) + 16)) -#define EXTENDED_REG_ENABLE(ogpio) (0x1 << ((ogpio * 2) + 16)) -#define EXTENDED_REG_SET(ogpio) (0x2 << (ogpio * 2)) -#define EXTENDED_REG_CLR(ogpio) (0x1 << (ogpio * 2)) - -/* These take the actual GPIO number (16 through 27) */ -#define EXTENDED_DISABLE(gpio) \ - EXTENDED_REG_DISABLE(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_ENABLE(gpio) \ - EXTENDED_REG_ENABLE(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_SET(gpio) \ - EXTENDED_REG_SET(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_CLR(gpio) \ - EXTENDED_REG_CLR(EXTENDED_OFFSET_GPIO(gpio)) - -#define EXTENDED_FULL_MASK (0xffffffff) - -/* -- API inline-functions -- */ - -/* - * Gets the current value of the specified pin - */ -static inline enum msp_gpio_data msp_gpio_pin_get(unsigned int gpio) -{ - u32 pinhi_mask = 0, pinhi_mask2 = 0; - - if (gpio >= MSP_NUM_GPIOS) - return MSP_GPIO_NONE; - - if (gpio < 16) { - pinhi_mask = BASIC_DATA_MASK(gpio); - } else { - /* - * Two cases are possible with the EXTENDED register: - * - In output mode (ENABLED flag set), check the CLR bit - * - In input mode (ENABLED flag not set), check the SET bit - */ - pinhi_mask = EXTENDED_ENABLE(gpio) | EXTENDED_CLR(gpio); - pinhi_mask2 = EXTENDED_SET(gpio); - } - if (((*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask) == pinhi_mask) || - (*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask2)) - return MSP_GPIO_HI; - else - return MSP_GPIO_LO; -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_set(enum msp_gpio_data data, unsigned int gpio) -{ - if (gpio >= MSP_NUM_GPIOS) - return; - - if (gpio < 16) { - if (data == MSP_GPIO_TOGGLE) - toggle_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - else if (data == MSP_GPIO_HI) - set_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - else - clear_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - } else { - if (data == MSP_GPIO_TOGGLE) { - /* Special ugly case: - * We have to read the CLR bit. - * If set, we write the CLR bit. - * If not, we write the SET bit. - */ - u32 tmpdata; - - custom_read_reg32(MSP_GPIO_DATA_REGISTER[gpio], - tmpdata); - if (tmpdata & EXTENDED_CLR(gpio)) - tmpdata = EXTENDED_CLR(gpio); - else - tmpdata = EXTENDED_SET(gpio); - custom_write_reg32(MSP_GPIO_DATA_REGISTER[gpio], - tmpdata); - } else { - u32 newdata; - - if (data == MSP_GPIO_HI) - newdata = EXTENDED_SET(gpio); - else - newdata = EXTENDED_CLR(gpio); - set_value_reg32(MSP_GPIO_DATA_REGISTER[gpio], - EXTENDED_FULL_MASK, newdata); - } - } -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_hi(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_HI, gpio); -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_lo(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_LO, gpio); -} - -/* Sets the specified pin to the opposite value */ -static inline void msp_gpio_pin_toggle(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_TOGGLE, gpio); -} - -/* Gets the mode of the specified pin */ -static inline enum msp_gpio_mode msp_gpio_pin_get_mode(unsigned int gpio) -{ - enum msp_gpio_mode retval = MSP_GPIO_UNKNOWN; - uint32_t data; - - if (gpio >= MSP_NUM_GPIOS) - return retval; - - data = *MSP_GPIO_MODE_REGISTER[gpio]; - - if (gpio < 16) { - retval = BASIC_MODE_FROM_REG(data, gpio); - } else { - /* Extended pins can only be either INPUT or OUTPUT */ - if (data & EXTENDED_ENABLE(gpio)) - retval = MSP_GPIO_OUTPUT; - else - retval = MSP_GPIO_INPUT; - } - - return retval; -} - -/* - * Sets the specified mode on the requested pin - * Returns 0 on success, or -1 if that mode is not allowed on this pin - */ -static inline int msp_gpio_pin_mode(enum msp_gpio_mode mode, unsigned int gpio) -{ - u32 modemask, newmode; - - if ((1 << gpio) & ~MSP_GPIO_MODE_ALLOWED[mode]) - return -1; - - if (gpio >= MSP_NUM_GPIOS) - return -1; - - if (gpio < 16) { - modemask = BASIC_MODE_MASK(gpio); - newmode = BASIC_MODE(mode, gpio); - } else { - modemask = EXTENDED_FULL_MASK; - if (mode == MSP_GPIO_INPUT) - newmode = EXTENDED_DISABLE(gpio); - else - newmode = EXTENDED_ENABLE(gpio); - } - /* Do the set atomically */ - set_value_reg32(MSP_GPIO_MODE_REGISTER[gpio], modemask, newmode); - - return 0; -} - -#endif /* __MSP_GPIO_MACROS_H__ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h deleted file mode 100644 index 55078b40f5b5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt handlers. - * - * Copyright (C) 2005, PMC-Sierra, Inc. All rights reserved. - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_INT_H -#define _MSP_INT_H - -/* - * The PMC-Sierra MSP product line has at least two different interrupt - * controllers, the SLP register based scheme and the CIC interrupt - * controller block mechanism. This file distinguishes between them - * so that devices see a uniform interface. - */ - -#if defined(CONFIG_IRQ_MSP_SLP) - #include "msp_slp_int.h" -#elif defined(CONFIG_IRQ_MSP_CIC) - #include "msp_cic_int.h" -#else - #error "What sort of interrupt controller does *your* MSP have?" -#endif - -#endif /* !_MSP_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h deleted file mode 100644 index 5b2535efceb2..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h +++ /dev/null @@ -1,189 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2000-2006 PMC-Sierra INC. - * - * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS - * SOFTWARE. - */ - -#ifndef _MSP_PCI_H_ -#define _MSP_PCI_H_ - -#define MSP_HAS_PCI(ID) (((u32)(ID) <= 0x4236) && ((u32)(ID) >= 0x4220)) - -/* - * It is convenient to program the OATRAN register so that - * Athena virtual address space and PCI address space are - * the same. This is not a requirement, just a convenience. - * - * The only hard restrictions on the value of OATRAN is that - * OATRAN must not be programmed to allow translated memory - * addresses to fall within the lowest 512MB of - * PCI address space. This region is hardcoded - * for use as Athena PCI Host Controller target - * access memory space to the Athena's SDRAM. - * - * Note that OATRAN applies only to memory accesses, not - * to I/O accesses. - * - * To program OATRAN to make Athena virtual address space - * and PCI address space have the same values, OATRAN - * is to be programmed to 0xB8000000. The top seven - * bits of the value mimic the seven bits clipped off - * by the PCI Host controller. - * - * With OATRAN at the said value, when the CPU does - * an access to its virtual address at, say 0xB900_5000, - * the address appearing on the PCI bus will be - * 0xB900_5000. - * - Michael Penner - */ -#define MSP_PCI_OATRAN 0xB8000000UL - -#define MSP_PCI_SPACE_BASE (MSP_PCI_OATRAN + 0x1002000UL) -#define MSP_PCI_SPACE_SIZE (0x3000000UL - 0x2000) -#define MSP_PCI_SPACE_END \ - (MSP_PCI_SPACE_BASE + MSP_PCI_SPACE_SIZE - 1) -#define MSP_PCI_IOSPACE_BASE (MSP_PCI_OATRAN + 0x1001000UL) -#define MSP_PCI_IOSPACE_SIZE 0x1000 -#define MSP_PCI_IOSPACE_END \ - (MSP_PCI_IOSPACE_BASE + MSP_PCI_IOSPACE_SIZE - 1) - -/* IRQ for PCI status interrupts */ -#define PCI_STAT_IRQ 20 - -#define QFLUSH_REG_1 0xB7F40000 - -typedef volatile unsigned int pcireg; -typedef void * volatile ppcireg; - -struct pci_block_copy -{ - pcireg unused1; /* +0x00 */ - pcireg unused2; /* +0x04 */ - ppcireg unused3; /* +0x08 */ - ppcireg unused4; /* +0x0C */ - pcireg unused5; /* +0x10 */ - pcireg unused6; /* +0x14 */ - pcireg unused7; /* +0x18 */ - ppcireg unused8; /* +0x1C */ - ppcireg unused9; /* +0x20 */ - pcireg unusedA; /* +0x24 */ - ppcireg unusedB; /* +0x28 */ - ppcireg unusedC; /* +0x2C */ -}; - -enum -{ - config_device_vendor, /* 0 */ - config_status_command, /* 1 */ - config_class_revision, /* 2 */ - config_BIST_header_latency_cache, /* 3 */ - config_BAR0, /* 4 */ - config_BAR1, /* 5 */ - config_BAR2, /* 6 */ - config_not_used7, /* 7 */ - config_not_used8, /* 8 */ - config_not_used9, /* 9 */ - config_CIS, /* 10 */ - config_subsystem, /* 11 */ - config_not_used12, /* 12 */ - config_capabilities, /* 13 */ - config_not_used14, /* 14 */ - config_lat_grant_irq, /* 15 */ - config_message_control,/* 16 */ - config_message_addr, /* 17 */ - config_message_data, /* 18 */ - config_VPD_addr, /* 19 */ - config_VPD_data, /* 20 */ - config_maxregs /* 21 - number of registers */ -}; - -struct msp_pci_regs -{ - pcireg hop_unused_00; /* +0x00 */ - pcireg hop_unused_04; /* +0x04 */ - pcireg hop_unused_08; /* +0x08 */ - pcireg hop_unused_0C; /* +0x0C */ - pcireg hop_unused_10; /* +0x10 */ - pcireg hop_unused_14; /* +0x14 */ - pcireg hop_unused_18; /* +0x18 */ - pcireg hop_unused_1C; /* +0x1C */ - pcireg hop_unused_20; /* +0x20 */ - pcireg hop_unused_24; /* +0x24 */ - pcireg hop_unused_28; /* +0x28 */ - pcireg hop_unused_2C; /* +0x2C */ - pcireg hop_unused_30; /* +0x30 */ - pcireg hop_unused_34; /* +0x34 */ - pcireg if_control; /* +0x38 */ - pcireg oatran; /* +0x3C */ - pcireg reset_ctl; /* +0x40 */ - pcireg config_addr; /* +0x44 */ - pcireg hop_unused_48; /* +0x48 */ - pcireg msg_signaled_int_status; /* +0x4C */ - pcireg msg_signaled_int_mask; /* +0x50 */ - pcireg if_status; /* +0x54 */ - pcireg if_mask; /* +0x58 */ - pcireg hop_unused_5C; /* +0x5C */ - pcireg hop_unused_60; /* +0x60 */ - pcireg hop_unused_64; /* +0x64 */ - pcireg hop_unused_68; /* +0x68 */ - pcireg hop_unused_6C; /* +0x6C */ - pcireg hop_unused_70; /* +0x70 */ - - struct pci_block_copy pci_bc[2] __attribute__((aligned(64))); - - pcireg error_hdr1; /* +0xE0 */ - pcireg error_hdr2; /* +0xE4 */ - - pcireg config[config_maxregs] __attribute__((aligned(256))); - -}; - -#define BPCI_CFGADDR_BUSNUM_SHF 16 -#define BPCI_CFGADDR_FUNCTNUM_SHF 8 -#define BPCI_CFGADDR_REGNUM_SHF 2 -#define BPCI_CFGADDR_ENABLE (1<<31) - -#define BPCI_IFCONTROL_RTO (1<<20) /* Retry timeout */ -#define BPCI_IFCONTROL_HCE (1<<16) /* Host configuration enable */ -#define BPCI_IFCONTROL_CTO_SHF 12 /* Shift count for CTO bits */ -#define BPCI_IFCONTROL_SE (1<<5) /* Enable exceptions on errors */ -#define BPCI_IFCONTROL_BIST (1<<4) /* Use BIST in per. mode */ -#define BPCI_IFCONTROL_CAP (1<<3) /* Enable capabilities */ -#define BPCI_IFCONTROL_MMC_SHF 0 /* Shift count for MMC bits */ - -#define BPCI_IFSTATUS_MGT (1<<8) /* Master Grant timeout */ -#define BPCI_IFSTATUS_MTT (1<<9) /* Master TRDY timeout */ -#define BPCI_IFSTATUS_MRT (1<<10) /* Master retry timeout */ -#define BPCI_IFSTATUS_BC0F (1<<13) /* Block copy 0 fault */ -#define BPCI_IFSTATUS_BC1F (1<<14) /* Block copy 1 fault */ -#define BPCI_IFSTATUS_PCIU (1<<15) /* PCI unable to respond */ -#define BPCI_IFSTATUS_BSIZ (1<<16) /* PCI access with illegal size */ -#define BPCI_IFSTATUS_BADD (1<<17) /* PCI access with illegal addr */ -#define BPCI_IFSTATUS_RTO (1<<18) /* Retry time out */ -#define BPCI_IFSTATUS_SER (1<<19) /* System error */ -#define BPCI_IFSTATUS_PER (1<<20) /* Parity error */ -#define BPCI_IFSTATUS_LCA (1<<21) /* Local CPU abort */ -#define BPCI_IFSTATUS_MEM (1<<22) /* Memory prot. violation */ -#define BPCI_IFSTATUS_ARB (1<<23) /* Arbiter timed out */ -#define BPCI_IFSTATUS_STA (1<<27) /* Signaled target abort */ -#define BPCI_IFSTATUS_TA (1<<28) /* Target abort */ -#define BPCI_IFSTATUS_MA (1<<29) /* Master abort */ -#define BPCI_IFSTATUS_PEI (1<<30) /* Parity error as initiator */ -#define BPCI_IFSTATUS_PET (1<<31) /* Parity error as target */ - -#define BPCI_RESETCTL_PR (1<<0) /* True if reset asserted */ -#define BPCI_RESETCTL_RT (1<<4) /* Release time */ -#define BPCI_RESETCTL_CT (1<<8) /* Config time */ -#define BPCI_RESETCTL_PE (1<<12) /* PCI enabled */ -#define BPCI_RESETCTL_HM (1<<13) /* PCI host mode */ -#define BPCI_RESETCTL_RI (1<<14) /* PCI reset in */ - -extern struct msp_pci_regs msp_pci_regs - __attribute__((section(".register"))); -extern unsigned long msp_pci_config_space - __attribute__((section(".register"))); - -#endif /* !_MSP_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h deleted file mode 100644 index 4120a01c30a9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h +++ /dev/null @@ -1,159 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * MIPS boards bootprom interface for the Linux kernel. - * - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _ASM_MSP_PROM_H -#define _ASM_MSP_PROM_H - -#include <linux/types.h> - -#define DEVICEID "deviceid" -#define FEATURES "features" -#define PROM_ENV "prom_env" -#define PROM_ENV_FILE "/proc/"PROM_ENV -#define PROM_ENV_SIZE 256 - -#define CPU_DEVID_FAMILY 0x0000ff00 -#define CPU_DEVID_REVISION 0x000000ff - -#define FPGA_IS_POLO(revision) \ - (((revision >= 0xb0) && (revision < 0xd0))) -#define FPGA_IS_5000(revision) \ - ((revision >= 0x80) && (revision <= 0x90)) -#define FPGA_IS_ZEUS(revision) ((revision < 0x7f)) -#define FPGA_IS_DUET(revision) \ - (((revision >= 0xa0) && (revision < 0xb0))) -#define FPGA_IS_MSP4200(revision) ((revision >= 0xd0)) -#define FPGA_IS_MSP7100(revision) ((revision >= 0xd0)) - -#define MACHINE_TYPE_POLO "POLO" -#define MACHINE_TYPE_DUET "DUET" -#define MACHINE_TYPE_ZEUS "ZEUS" -#define MACHINE_TYPE_MSP2000REVB "MSP2000REVB" -#define MACHINE_TYPE_MSP5000 "MSP5000" -#define MACHINE_TYPE_MSP4200 "MSP4200" -#define MACHINE_TYPE_MSP7120 "MSP7120" -#define MACHINE_TYPE_MSP7130 "MSP7130" -#define MACHINE_TYPE_OTHER "OTHER" - -#define MACHINE_TYPE_POLO_FPGA "POLO-FPGA" -#define MACHINE_TYPE_DUET_FPGA "DUET-FPGA" -#define MACHINE_TYPE_ZEUS_FPGA "ZEUS_FPGA" -#define MACHINE_TYPE_MSP2000REVB_FPGA "MSP2000REVB-FPGA" -#define MACHINE_TYPE_MSP5000_FPGA "MSP5000-FPGA" -#define MACHINE_TYPE_MSP4200_FPGA "MSP4200-FPGA" -#define MACHINE_TYPE_MSP7100_FPGA "MSP7100-FPGA" -#define MACHINE_TYPE_OTHER_FPGA "OTHER-FPGA" - -/* Device Family definitions */ -#define FAMILY_FPGA 0x0000 -#define FAMILY_ZEUS 0x1000 -#define FAMILY_POLO 0x2000 -#define FAMILY_DUET 0x4000 -#define FAMILY_TRIAD 0x5000 -#define FAMILY_MSP4200 0x4200 -#define FAMILY_MSP4200_FPGA 0x4f00 -#define FAMILY_MSP7100 0x7100 -#define FAMILY_MSP7100_FPGA 0x7f00 - -/* Device Type definitions */ -#define TYPE_MSP7120 0x7120 -#define TYPE_MSP7130 0x7130 - -#define ENET_KEY 'E' -#define ENETTXD_KEY 'e' -#define PCI_KEY 'P' -#define PCIMUX_KEY 'p' -#define SEC_KEY 'S' -#define SPAD_KEY 'D' -#define TDM_KEY 'T' -#define ZSP_KEY 'Z' - -#define FEATURE_NOEXIST '-' -#define FEATURE_EXIST '+' - -#define ENET_MII 'M' -#define ENET_RMII 'R' - -#define ENETTXD_FALLING 'F' -#define ENETTXD_RISING 'R' - -#define PCI_HOST 'H' -#define PCI_PERIPHERAL 'P' - -#define PCIMUX_FULL 'F' -#define PCIMUX_SINGLE 'S' - -#define SEC_DUET 'D' -#define SEC_POLO 'P' -#define SEC_SLOW 'S' -#define SEC_TRIAD 'T' - -#define SPAD_POLO 'P' - -#define TDM_DUET 'D' /* DUET TDMs might exist */ -#define TDM_POLO 'P' /* POLO TDMs might exist */ -#define TDM_TRIAD 'T' /* TRIAD TDMs might exist */ - -#define ZSP_DUET 'D' /* one DUET zsp engine */ -#define ZSP_TRIAD 'T' /* two TRIAD zsp engines */ - -extern char *prom_getenv(char *name); -extern void prom_init_cmdline(void); -extern void prom_meminit(void); -extern void prom_fixup_mem_map(unsigned long start_mem, - unsigned long end_mem); - -extern int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr); -extern unsigned long get_deviceid(void); -extern char identify_enet(unsigned long interface_num); -extern char identify_enetTxD(unsigned long interface_num); -extern char identify_pci(void); -extern char identify_sec(void); -extern char identify_spad(void); -extern char identify_sec(void); -extern char identify_tdm(void); -extern char identify_zsp(void); -extern unsigned long identify_family(void); -extern unsigned long identify_revision(void); - -/* - * The following macro calls prom_printf and puts the format string - * into an init section so it can be reclaimed. - */ -#define ppfinit(f, x...) \ - do { \ - static char _f[] __initdata = KERN_INFO f; \ - printk(_f, ## x); \ - } while (0) - -/* Memory descriptor management. */ -#define PROM_MAX_PMEMBLOCKS 7 /* 6 used */ - -enum yamon_memtypes { - yamon_dontuse, - yamon_prom, - yamon_free, -}; - -struct prom_pmemblock { - unsigned long base; /* Within KSEG0. */ - unsigned int size; /* In bytes. */ - unsigned int type; /* free or prom memory */ -}; - -extern int prom_argc; -extern char **prom_argv; -extern char **prom_envp; -extern int *prom_vec; -extern struct prom_pmemblock *prom_getmdesc(void); - -#endif /* !_ASM_MSP_PROM_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h deleted file mode 100644 index 90dbe43c8d27..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * SMP/VPE-safe functions to access "registers" (see note). - * - * NOTES: -* - These macros use ll/sc instructions, so it is your responsibility to - * ensure these are available on your platform before including this file. - * - The MIPS32 spec states that ll/sc results are undefined for uncached - * accesses. This means they can't be used on HW registers accessed - * through kseg1. Code which requires these macros for this purpose must - * front-end the registers with cached memory "registers" and have a single - * thread update the actual HW registers. - * - A maximum of 2k of code can be inserted between ll and sc. Every - * memory accesses between the instructions will increase the chance of - * sc failing and having to loop. - * - When using custom_read_reg32/custom_write_reg32 only perform the - * necessary logical operations on the register value in between these - * two calls. All other logic should be performed before the first call. - * - There is a bug on the R10000 chips which has a workaround. If you - * are affected by this bug, make sure to define the symbol 'R10000_LLSC_WAR' - * to be non-zero. If you are using this header from within linux, you may - * include <asm/war.h> before including this file to have this defined - * appropriately for you. - * - * Copyright 2005-2007 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., 675 - * Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __ASM_REGOPS_H__ -#define __ASM_REGOPS_H__ - -#include <linux/types.h> - -#include <asm/compiler.h> -#include <asm/war.h> - -#ifndef R10000_LLSC_WAR -#define R10000_LLSC_WAR 0 -#endif - -#if R10000_LLSC_WAR == 1 -#define __beqz "beqzl " -#else -#define __beqz "beqz " -#endif - -#ifndef _LINUX_TYPES_H -typedef unsigned int u32; -#endif - -/* - * Sets all the masked bits to the corresponding value bits - */ -static inline void set_value_reg32(volatile u32 *const addr, - u32 const mask, - u32 const value) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # set_value_reg32 \n" - " and %0, %2 \n" - " or %0, %3 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (~mask), "ir" (value), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Sets all the masked bits to '1' - */ -static inline void set_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # set_reg32 \n" - " or %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Sets all the masked bits to '0' - */ -static inline void clear_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # clear_reg32 \n" - " and %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (~mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Toggles all masked bits from '0' to '1' and '1' to '0' - */ -static inline void toggle_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # toggle_reg32 \n" - " xor %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Read all masked bits others are returned as '0' - */ -static inline u32 read_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " lw %0, %1 # read \n" - " and %0, %2 # mask \n" - " .set pop \n" - : "=&r" (temp) - : "m" (*addr), "ir" (mask)); - - return temp; -} - -/* - * blocking_read_reg32 - Read address with blocking load - * - * Uncached writes need to be read back to ensure they reach RAM. - * The returned value must be 'used' to prevent from becoming a - * non-blocking load. - */ -static inline u32 blocking_read_reg32(volatile u32 *const addr) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " lw %0, %1 # read \n" - " move %0, %0 # block \n" - " .set pop \n" - : "=&r" (temp) - : "m" (*addr)); - - return temp; -} - -/* - * For special strange cases only: - * - * If you need custom processing within a ll/sc loop, use the following macros - * VERY CAREFULLY: - * - * u32 tmp; <-- Define a variable to hold the data - * - * custom_read_reg32(address, tmp); <-- Reads the address and put the value - * in the 'tmp' variable given - * - * From here on out, you are (basically) atomic, so don't do anything too - * fancy! - * Also, this code may loop if the end of this block fails to write - * everything back safely due do the other CPU, so do NOT do anything - * with side-effects! - * - * custom_write_reg32(address, tmp); <-- Writes back 'tmp' safely. - */ -#define custom_read_reg32(address, tmp) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set arch=r4000 \n" \ - "1: ll %0, %1 #custom_read_reg32 \n" \ - " .set pop \n" \ - : "=r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address) \ - : GCC_OFF_SMALL_ASM() (*address)) - -#define custom_write_reg32(address, tmp) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set arch=r4000 \n" \ - " sc %0, %1 #custom_write_reg32 \n" \ - " "__beqz"%0, 1b \n" \ - " nop \n" \ - " .set pop \n" \ - : "=&r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address) \ - : "0" (tmp), GCC_OFF_SMALL_ASM() (*address)) - -#endif /* __ASM_REGOPS_H__ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h deleted file mode 100644 index e2ce9be51f3f..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h +++ /dev/null @@ -1,652 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the address space, registers and register configuration - * (bit masks, access macros etc) for the PMC-Sierra line of MSP products. - * This file contains addess maps for all the devices in the line of - * products but only has register definitions and configuration masks for - * registers which aren't definitely associated with any device. Things - * like clock settings, reset access, the ELB etc. Individual device - * drivers will reference the appropriate XXX_BASE value defined here - * and have individual registers offset from that. - * - * Copyright (C) 2005-2007 PMC-Sierra, Inc. All rights reserved. - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#include <asm/addrspace.h> -#include <linux/types.h> - -#ifndef _ASM_MSP_REGS_H -#define _ASM_MSP_REGS_H - -/* - ######################################################################## - # Address space and device base definitions # - ######################################################################## - */ - -/* - *************************************************************************** - * System Logic and Peripherals (ELB, UART0, etc) device address space * - *************************************************************************** - */ -#define MSP_SLP_BASE 0x1c000000 - /* System Logic and Peripherals */ -#define MSP_RST_BASE (MSP_SLP_BASE + 0x10) - /* System reset register base */ -#define MSP_RST_SIZE 0x0C /* System reset register space */ - -#define MSP_WTIMER_BASE (MSP_SLP_BASE + 0x04C) - /* watchdog timer base */ -#define MSP_ITIMER_BASE (MSP_SLP_BASE + 0x054) - /* internal timer base */ -#define MSP_UART0_BASE (MSP_SLP_BASE + 0x100) - /* UART0 controller base */ -#define MSP_BCPY_CTRL_BASE (MSP_SLP_BASE + 0x120) - /* Block Copy controller base */ -#define MSP_BCPY_DESC_BASE (MSP_SLP_BASE + 0x160) - /* Block Copy descriptor base */ - -/* - *************************************************************************** - * PCI address space * - *************************************************************************** - */ -#define MSP_PCI_BASE 0x19000000 - -/* - *************************************************************************** - * MSbus device address space * - *************************************************************************** - */ -#define MSP_MSB_BASE 0x18000000 - /* MSbus address start */ -#define MSP_PER_BASE (MSP_MSB_BASE + 0x400000) - /* Peripheral device registers */ -#define MSP_MAC0_BASE (MSP_MSB_BASE + 0x600000) - /* MAC A device registers */ -#define MSP_MAC1_BASE (MSP_MSB_BASE + 0x700000) - /* MAC B device registers */ -#define MSP_MAC_SIZE 0xE0 /* MAC register space */ - -#define MSP_SEC_BASE (MSP_MSB_BASE + 0x800000) - /* Security Engine registers */ -#define MSP_MAC2_BASE (MSP_MSB_BASE + 0x900000) - /* MAC C device registers */ -#define MSP_ADSL2_BASE (MSP_MSB_BASE + 0xA80000) - /* ADSL2 device registers */ -#define MSP_USB0_BASE (MSP_MSB_BASE + 0xB00000) - /* USB0 device registers */ -#define MSP_USB1_BASE (MSP_MSB_BASE + 0x300000) - /* USB1 device registers */ -#define MSP_CPUIF_BASE (MSP_MSB_BASE + 0xC00000) - /* CPU interface registers */ - -/* Devices within the MSbus peripheral block */ -#define MSP_UART1_BASE (MSP_PER_BASE + 0x030) - /* UART1 controller base */ -#define MSP_SPI_BASE (MSP_PER_BASE + 0x058) - /* SPI/MPI control registers */ -#define MSP_TWI_BASE (MSP_PER_BASE + 0x090) - /* Two-wire control registers */ -#define MSP_PTIMER_BASE (MSP_PER_BASE + 0x0F0) - /* Programmable timer control */ - -/* - *************************************************************************** - * Physical Memory configuration address space * - *************************************************************************** - */ -#define MSP_MEM_CFG_BASE 0x17f00000 - -#define MSP_MEM_INDIRECT_CTL_10 0x10 - -/* - * Notes: - * 1) The SPI registers are split into two blocks, one offset from the - * MSP_SPI_BASE by 0x00 and the other offset from the MSP_SPI_BASE by - * 0x68. The SPI driver definitions for the register must be aware - * of this. - * 2) The block copy engine register are divided into two regions, one - * for the control/configuration of the engine proper and one for the - * values of the descriptors used in the copy process. These have - * different base defines (CTRL_BASE vs DESC_BASE) - * 3) These constants are for physical addresses which means that they - * work correctly with "ioremap" and friends. This means that device - * drivers will need to remap these addresses using ioremap and perhaps - * the readw/writew macros. Or they could use the regptr() macro - * defined below, but the readw/writew calls are the correct thing. - * 4) The UARTs have an additional status register offset from the base - * address. This register isn't used in the standard 8250 driver but - * may be used in other software. Consult the hardware datasheet for - * offset details. - * 5) For some unknown reason the security engine (MSP_SEC_BASE) registers - * start at an offset of 0x84 from the base address but the block of - * registers before this is reserved for the security engine. The - * driver will have to be aware of this but it makes the register - * definitions line up better with the documentation. - */ - -/* - ######################################################################## - # System register definitions. Not associated with a specific device # - ######################################################################## - */ - -/* - * This macro maps the physical register number into uncached space - * and (for C code) casts it into a u32 pointer so it can be dereferenced - * Normally these would be accessed with ioremap and readX/writeX, but - * these are convenient for a lot of internal kernel code. - */ -#ifdef __ASSEMBLER__ - #define regptr(addr) (KSEG1ADDR(addr)) -#else - #define regptr(addr) ((volatile u32 *const)(KSEG1ADDR(addr))) -#endif - -/* - *************************************************************************** - * System Logic and Peripherals (RESET, ELB, etc) registers * - *************************************************************************** - */ - -/* System Control register definitions */ -#define DEV_ID_REG regptr(MSP_SLP_BASE + 0x00) - /* Device-ID RO */ -#define FWR_ID_REG regptr(MSP_SLP_BASE + 0x04) - /* Firmware-ID Register RW */ -#define SYS_ID_REG0 regptr(MSP_SLP_BASE + 0x08) - /* System-ID Register-0 RW */ -#define SYS_ID_REG1 regptr(MSP_SLP_BASE + 0x0C) - /* System-ID Register-1 RW */ - -/* System Reset register definitions */ -#define RST_STS_REG regptr(MSP_SLP_BASE + 0x10) - /* System Reset Status RO */ -#define RST_SET_REG regptr(MSP_SLP_BASE + 0x14) - /* System Set Reset WO */ -#define RST_CLR_REG regptr(MSP_SLP_BASE + 0x18) - /* System Clear Reset WO */ - -/* System Clock Registers */ -#define PCI_SLP_REG regptr(MSP_SLP_BASE + 0x1C) - /* PCI clock generator RW */ -#define URT_SLP_REG regptr(MSP_SLP_BASE + 0x20) - /* UART clock generator RW */ -/* reserved (MSP_SLP_BASE + 0x24) */ -/* reserved (MSP_SLP_BASE + 0x28) */ -#define PLL1_SLP_REG regptr(MSP_SLP_BASE + 0x2C) - /* PLL1 clock generator RW */ -#define PLL0_SLP_REG regptr(MSP_SLP_BASE + 0x30) - /* PLL0 clock generator RW */ -#define MIPS_SLP_REG regptr(MSP_SLP_BASE + 0x34) - /* MIPS clock generator RW */ -#define VE_SLP_REG regptr(MSP_SLP_BASE + 0x38) - /* Voice Eng clock generator RW */ -/* reserved (MSP_SLP_BASE + 0x3C) */ -#define MSB_SLP_REG regptr(MSP_SLP_BASE + 0x40) - /* MS-Bus clock generator RW */ -#define SMAC_SLP_REG regptr(MSP_SLP_BASE + 0x44) - /* Sec & MAC clock generator RW */ -#define PERF_SLP_REG regptr(MSP_SLP_BASE + 0x48) - /* Per & TDM clock generator RW */ - -/* Interrupt Controller Registers */ -#define SLP_INT_STS_REG regptr(MSP_SLP_BASE + 0x70) - /* Interrupt status register RW */ -#define SLP_INT_MSK_REG regptr(MSP_SLP_BASE + 0x74) - /* Interrupt enable/mask RW */ -#define SE_MBOX_REG regptr(MSP_SLP_BASE + 0x78) - /* Security Engine mailbox RW */ -#define VE_MBOX_REG regptr(MSP_SLP_BASE + 0x7C) - /* Voice Engine mailbox RW */ - -/* ELB Controller Registers */ -#define CS0_CNFG_REG regptr(MSP_SLP_BASE + 0x80) - /* ELB CS0 Configuration Reg */ -#define CS0_ADDR_REG regptr(MSP_SLP_BASE + 0x84) - /* ELB CS0 Base Address Reg */ -#define CS0_MASK_REG regptr(MSP_SLP_BASE + 0x88) - /* ELB CS0 Mask Register */ -#define CS0_ACCESS_REG regptr(MSP_SLP_BASE + 0x8C) - /* ELB CS0 access register */ - -#define CS1_CNFG_REG regptr(MSP_SLP_BASE + 0x90) - /* ELB CS1 Configuration Reg */ -#define CS1_ADDR_REG regptr(MSP_SLP_BASE + 0x94) - /* ELB CS1 Base Address Reg */ -#define CS1_MASK_REG regptr(MSP_SLP_BASE + 0x98) - /* ELB CS1 Mask Register */ -#define CS1_ACCESS_REG regptr(MSP_SLP_BASE + 0x9C) - /* ELB CS1 access register */ - -#define CS2_CNFG_REG regptr(MSP_SLP_BASE + 0xA0) - /* ELB CS2 Configuration Reg */ -#define CS2_ADDR_REG regptr(MSP_SLP_BASE + 0xA4) - /* ELB CS2 Base Address Reg */ -#define CS2_MASK_REG regptr(MSP_SLP_BASE + 0xA8) - /* ELB CS2 Mask Register */ -#define CS2_ACCESS_REG regptr(MSP_SLP_BASE + 0xAC) - /* ELB CS2 access register */ - -#define CS3_CNFG_REG regptr(MSP_SLP_BASE + 0xB0) - /* ELB CS3 Configuration Reg */ -#define CS3_ADDR_REG regptr(MSP_SLP_BASE + 0xB4) - /* ELB CS3 Base Address Reg */ -#define CS3_MASK_REG regptr(MSP_SLP_BASE + 0xB8) - /* ELB CS3 Mask Register */ -#define CS3_ACCESS_REG regptr(MSP_SLP_BASE + 0xBC) - /* ELB CS3 access register */ - -#define CS4_CNFG_REG regptr(MSP_SLP_BASE + 0xC0) - /* ELB CS4 Configuration Reg */ -#define CS4_ADDR_REG regptr(MSP_SLP_BASE + 0xC4) - /* ELB CS4 Base Address Reg */ -#define CS4_MASK_REG regptr(MSP_SLP_BASE + 0xC8) - /* ELB CS4 Mask Register */ -#define CS4_ACCESS_REG regptr(MSP_SLP_BASE + 0xCC) - /* ELB CS4 access register */ - -#define CS5_CNFG_REG regptr(MSP_SLP_BASE + 0xD0) - /* ELB CS5 Configuration Reg */ -#define CS5_ADDR_REG regptr(MSP_SLP_BASE + 0xD4) - /* ELB CS5 Base Address Reg */ -#define CS5_MASK_REG regptr(MSP_SLP_BASE + 0xD8) - /* ELB CS5 Mask Register */ -#define CS5_ACCESS_REG regptr(MSP_SLP_BASE + 0xDC) - /* ELB CS5 access register */ - -/* reserved 0xE0 - 0xE8 */ -#define ELB_1PC_EN_REG regptr(MSP_SLP_BASE + 0xEC) - /* ELB single PC card detect */ - -/* reserved 0xF0 - 0xF8 */ -#define ELB_CLK_CFG_REG regptr(MSP_SLP_BASE + 0xFC) - /* SDRAM read/ELB timing Reg */ - -/* Extended UART status registers */ -#define UART0_STATUS_REG regptr(MSP_UART0_BASE + 0x0c0) - /* UART Status Register 0 */ -#define UART1_STATUS_REG regptr(MSP_UART1_BASE + 0x170) - /* UART Status Register 1 */ - -/* Performance monitoring registers */ -#define PERF_MON_CTRL_REG regptr(MSP_SLP_BASE + 0x140) - /* Performance monitor control */ -#define PERF_MON_CLR_REG regptr(MSP_SLP_BASE + 0x144) - /* Performance monitor clear */ -#define PERF_MON_CNTH_REG regptr(MSP_SLP_BASE + 0x148) - /* Perf monitor counter high */ -#define PERF_MON_CNTL_REG regptr(MSP_SLP_BASE + 0x14C) - /* Perf monitor counter low */ - -/* System control registers */ -#define SYS_CTRL_REG regptr(MSP_SLP_BASE + 0x150) - /* System control register */ -#define SYS_ERR1_REG regptr(MSP_SLP_BASE + 0x154) - /* System Error status 1 */ -#define SYS_ERR2_REG regptr(MSP_SLP_BASE + 0x158) - /* System Error status 2 */ -#define SYS_INT_CFG_REG regptr(MSP_SLP_BASE + 0x15C) - /* System Interrupt config */ - -/* Voice Engine Memory configuration */ -#define VE_MEM_REG regptr(MSP_SLP_BASE + 0x17C) - /* Voice engine memory config */ - -/* CPU/SLP Error Status registers */ -#define CPU_ERR1_REG regptr(MSP_SLP_BASE + 0x180) - /* CPU/SLP Error status 1 */ -#define CPU_ERR2_REG regptr(MSP_SLP_BASE + 0x184) - /* CPU/SLP Error status 1 */ - -/* Extended GPIO registers */ -#define EXTENDED_GPIO1_REG regptr(MSP_SLP_BASE + 0x188) -#define EXTENDED_GPIO2_REG regptr(MSP_SLP_BASE + 0x18c) -#define EXTENDED_GPIO_REG EXTENDED_GPIO1_REG - /* Backward-compatibility */ - -/* System Error registers */ -#define SLP_ERR_STS_REG regptr(MSP_SLP_BASE + 0x190) - /* Int status for SLP errors */ -#define SLP_ERR_MSK_REG regptr(MSP_SLP_BASE + 0x194) - /* Int mask for SLP errors */ -#define SLP_ELB_ERST_REG regptr(MSP_SLP_BASE + 0x198) - /* External ELB reset */ -#define SLP_BOOT_STS_REG regptr(MSP_SLP_BASE + 0x19C) - /* Boot Status */ - -/* Extended ELB addressing */ -#define CS0_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A0) - /* CS0 Extended address */ -#define CS1_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A4) - /* CS1 Extended address */ -#define CS2_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A8) - /* CS2 Extended address */ -#define CS3_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1AC) - /* CS3 Extended address */ -/* reserved 0x1B0 */ -#define CS5_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1B4) - /* CS5 Extended address */ - -/* PLL Adjustment registers */ -#define PLL_LOCK_REG regptr(MSP_SLP_BASE + 0x200) - /* PLL0 lock status */ -#define PLL_ARST_REG regptr(MSP_SLP_BASE + 0x204) - /* PLL Analog reset status */ -#define PLL0_ADJ_REG regptr(MSP_SLP_BASE + 0x208) - /* PLL0 Adjustment value */ -#define PLL1_ADJ_REG regptr(MSP_SLP_BASE + 0x20C) - /* PLL1 Adjustment value */ - -/* - *************************************************************************** - * Peripheral Register definitions * - *************************************************************************** - */ - -/* Peripheral status */ -#define PER_CTRL_REG regptr(MSP_PER_BASE + 0x50) - /* Peripheral control register */ -#define PER_STS_REG regptr(MSP_PER_BASE + 0x54) - /* Peripheral status register */ - -/* SPI/MPI Registers */ -#define SMPI_TX_SZ_REG regptr(MSP_PER_BASE + 0x58) - /* SPI/MPI Tx Size register */ -#define SMPI_RX_SZ_REG regptr(MSP_PER_BASE + 0x5C) - /* SPI/MPI Rx Size register */ -#define SMPI_CTL_REG regptr(MSP_PER_BASE + 0x60) - /* SPI/MPI Control register */ -#define SMPI_MS_REG regptr(MSP_PER_BASE + 0x64) - /* SPI/MPI Chip Select reg */ -#define SMPI_CORE_DATA_REG regptr(MSP_PER_BASE + 0xC0) - /* SPI/MPI Core Data reg */ -#define SMPI_CORE_CTRL_REG regptr(MSP_PER_BASE + 0xC4) - /* SPI/MPI Core Control reg */ -#define SMPI_CORE_STAT_REG regptr(MSP_PER_BASE + 0xC8) - /* SPI/MPI Core Status reg */ -#define SMPI_CORE_SSEL_REG regptr(MSP_PER_BASE + 0xCC) - /* SPI/MPI Core Ssel reg */ -#define SMPI_FIFO_REG regptr(MSP_PER_BASE + 0xD0) - /* SPI/MPI Data FIFO reg */ - -/* Peripheral Block Error Registers */ -#define PER_ERR_STS_REG regptr(MSP_PER_BASE + 0x70) - /* Error Bit Status Register */ -#define PER_ERR_MSK_REG regptr(MSP_PER_BASE + 0x74) - /* Error Bit Mask Register */ -#define PER_HDR1_REG regptr(MSP_PER_BASE + 0x78) - /* Error Header 1 Register */ -#define PER_HDR2_REG regptr(MSP_PER_BASE + 0x7C) - /* Error Header 2 Register */ - -/* Peripheral Block Interrupt Registers */ -#define PER_INT_STS_REG regptr(MSP_PER_BASE + 0x80) - /* Interrupt status register */ -#define PER_INT_MSK_REG regptr(MSP_PER_BASE + 0x84) - /* Interrupt Mask Register */ -#define GPIO_INT_STS_REG regptr(MSP_PER_BASE + 0x88) - /* GPIO interrupt status reg */ -#define GPIO_INT_MSK_REG regptr(MSP_PER_BASE + 0x8C) - /* GPIO interrupt MASK Reg */ - -/* POLO GPIO registers */ -#define POLO_GPIO_DAT1_REG regptr(MSP_PER_BASE + 0x0E0) - /* Polo GPIO[8:0] data reg */ -#define POLO_GPIO_CFG1_REG regptr(MSP_PER_BASE + 0x0E4) - /* Polo GPIO[7:0] config reg */ -#define POLO_GPIO_CFG2_REG regptr(MSP_PER_BASE + 0x0E8) - /* Polo GPIO[15:8] config reg */ -#define POLO_GPIO_OD1_REG regptr(MSP_PER_BASE + 0x0EC) - /* Polo GPIO[31:0] output drive */ -#define POLO_GPIO_CFG3_REG regptr(MSP_PER_BASE + 0x170) - /* Polo GPIO[23:16] config reg */ -#define POLO_GPIO_DAT2_REG regptr(MSP_PER_BASE + 0x174) - /* Polo GPIO[15:9] data reg */ -#define POLO_GPIO_DAT3_REG regptr(MSP_PER_BASE + 0x178) - /* Polo GPIO[23:16] data reg */ -#define POLO_GPIO_DAT4_REG regptr(MSP_PER_BASE + 0x17C) - /* Polo GPIO[31:24] data reg */ -#define POLO_GPIO_DAT5_REG regptr(MSP_PER_BASE + 0x180) - /* Polo GPIO[39:32] data reg */ -#define POLO_GPIO_DAT6_REG regptr(MSP_PER_BASE + 0x184) - /* Polo GPIO[47:40] data reg */ -#define POLO_GPIO_DAT7_REG regptr(MSP_PER_BASE + 0x188) - /* Polo GPIO[54:48] data reg */ -#define POLO_GPIO_CFG4_REG regptr(MSP_PER_BASE + 0x18C) - /* Polo GPIO[31:24] config reg */ -#define POLO_GPIO_CFG5_REG regptr(MSP_PER_BASE + 0x190) - /* Polo GPIO[39:32] config reg */ -#define POLO_GPIO_CFG6_REG regptr(MSP_PER_BASE + 0x194) - /* Polo GPIO[47:40] config reg */ -#define POLO_GPIO_CFG7_REG regptr(MSP_PER_BASE + 0x198) - /* Polo GPIO[54:48] config reg */ -#define POLO_GPIO_OD2_REG regptr(MSP_PER_BASE + 0x19C) - /* Polo GPIO[54:32] output drive */ - -/* Generic GPIO registers */ -#define GPIO_DATA1_REG regptr(MSP_PER_BASE + 0x170) - /* GPIO[1:0] data register */ -#define GPIO_DATA2_REG regptr(MSP_PER_BASE + 0x174) - /* GPIO[5:2] data register */ -#define GPIO_DATA3_REG regptr(MSP_PER_BASE + 0x178) - /* GPIO[9:6] data register */ -#define GPIO_DATA4_REG regptr(MSP_PER_BASE + 0x17C) - /* GPIO[15:10] data register */ -#define GPIO_CFG1_REG regptr(MSP_PER_BASE + 0x180) - /* GPIO[1:0] config register */ -#define GPIO_CFG2_REG regptr(MSP_PER_BASE + 0x184) - /* GPIO[5:2] config register */ -#define GPIO_CFG3_REG regptr(MSP_PER_BASE + 0x188) - /* GPIO[9:6] config register */ -#define GPIO_CFG4_REG regptr(MSP_PER_BASE + 0x18C) - /* GPIO[15:10] config register */ -#define GPIO_OD_REG regptr(MSP_PER_BASE + 0x190) - /* GPIO[15:0] output drive */ - -/* - *************************************************************************** - * CPU Interface register definitions * - *************************************************************************** - */ -#define PCI_FLUSH_REG regptr(MSP_CPUIF_BASE + 0x00) - /* PCI-SDRAM queue flush trigger */ -#define OCP_ERR1_REG regptr(MSP_CPUIF_BASE + 0x04) - /* OCP Error Attribute 1 */ -#define OCP_ERR2_REG regptr(MSP_CPUIF_BASE + 0x08) - /* OCP Error Attribute 2 */ -#define OCP_STS_REG regptr(MSP_CPUIF_BASE + 0x0C) - /* OCP Error Status */ -#define CPUIF_PM_REG regptr(MSP_CPUIF_BASE + 0x10) - /* CPU policy configuration */ -#define CPUIF_CFG_REG regptr(MSP_CPUIF_BASE + 0x10) - /* Misc configuration options */ - -/* Central Interrupt Controller Registers */ -#define MSP_CIC_BASE (MSP_CPUIF_BASE + 0x8000) - /* Central Interrupt registers */ -#define CIC_EXT_CFG_REG regptr(MSP_CIC_BASE + 0x00) - /* External interrupt config */ -#define CIC_STS_REG regptr(MSP_CIC_BASE + 0x04) - /* CIC Interrupt Status */ -#define CIC_VPE0_MSK_REG regptr(MSP_CIC_BASE + 0x08) - /* VPE0 Interrupt Mask */ -#define CIC_VPE1_MSK_REG regptr(MSP_CIC_BASE + 0x0C) - /* VPE1 Interrupt Mask */ -#define CIC_TC0_MSK_REG regptr(MSP_CIC_BASE + 0x10) - /* Thread Context 0 Int Mask */ -#define CIC_TC1_MSK_REG regptr(MSP_CIC_BASE + 0x14) - /* Thread Context 1 Int Mask */ -#define CIC_TC2_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 2 Int Mask */ -#define CIC_TC3_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 3 Int Mask */ -#define CIC_TC4_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 4 Int Mask */ -#define CIC_PCIMSI_STS_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_PCIMSI_MSK_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_PCIFLSH_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_VPE0_SWINT_REG regptr(MSP_CIC_BASE + 0x08) - - -/* - *************************************************************************** - * Memory controller registers * - *************************************************************************** - */ -#define MEM_CFG1_REG regptr(MSP_MEM_CFG_BASE + 0x00) -#define MEM_SS_ADDR regptr(MSP_MEM_CFG_BASE + 0x00) -#define MEM_SS_DATA regptr(MSP_MEM_CFG_BASE + 0x04) -#define MEM_SS_WRITE regptr(MSP_MEM_CFG_BASE + 0x08) - -/* - *************************************************************************** - * PCI controller registers * - *************************************************************************** - */ -#define PCI_BASE_REG regptr(MSP_PCI_BASE + 0x00) -#define PCI_CONFIG_SPACE_REG regptr(MSP_PCI_BASE + 0x800) -#define PCI_JTAG_DEVID_REG regptr(MSP_SLP_BASE + 0x13c) - -/* - ######################################################################## - # Register content & macro definitions # - ######################################################################## - */ - -/* - *************************************************************************** - * DEV_ID defines * - *************************************************************************** - */ -#define DEV_ID_PCI_DIS (1 << 26) /* Set if PCI disabled */ -#define DEV_ID_PCI_HOST (1 << 20) /* Set if PCI host */ -#define DEV_ID_SINGLE_PC (1 << 19) /* Set if single PC Card */ -#define DEV_ID_FAMILY (0xff << 8) /* family ID code */ -#define POLO_ZEUS_SUB_FAMILY (0x7 << 16) /* sub family for Polo/Zeus */ - -#define MSPFPGA_ID (0x00 << 8) /* you are on your own here */ -#define MSP5000_ID (0x50 << 8) -#define MSP4F00_ID (0x4f << 8) /* FPGA version of MSP4200 */ -#define MSP4E00_ID (0x4f << 8) /* FPGA version of MSP7120 */ -#define MSP4200_ID (0x42 << 8) -#define MSP4000_ID (0x40 << 8) -#define MSP2XXX_ID (0x20 << 8) -#define MSPZEUS_ID (0x10 << 8) - -#define MSP2004_SUB_ID (0x0 << 16) -#define MSP2005_SUB_ID (0x1 << 16) -#define MSP2006_SUB_ID (0x1 << 16) -#define MSP2007_SUB_ID (0x2 << 16) -#define MSP2010_SUB_ID (0x3 << 16) -#define MSP2015_SUB_ID (0x4 << 16) -#define MSP2020_SUB_ID (0x5 << 16) -#define MSP2100_SUB_ID (0x6 << 16) - -/* - *************************************************************************** - * RESET defines * - *************************************************************************** - */ -#define MSP_GR_RST (0x01 << 0) /* Global reset bit */ -#define MSP_MR_RST (0x01 << 1) /* MIPS reset bit */ -#define MSP_PD_RST (0x01 << 2) /* PVC DMA reset bit */ -#define MSP_PP_RST (0x01 << 3) /* PVC reset bit */ -/* reserved */ -#define MSP_EA_RST (0x01 << 6) /* Mac A reset bit */ -#define MSP_EB_RST (0x01 << 7) /* Mac B reset bit */ -#define MSP_SE_RST (0x01 << 8) /* Security Eng reset bit */ -#define MSP_PB_RST (0x01 << 9) /* Per block reset bit */ -#define MSP_EC_RST (0x01 << 10) /* Mac C reset bit */ -#define MSP_TW_RST (0x01 << 11) /* TWI reset bit */ -#define MSP_SPI_RST (0x01 << 12) /* SPI/MPI reset bit */ -#define MSP_U1_RST (0x01 << 13) /* UART1 reset bit */ -#define MSP_U0_RST (0x01 << 14) /* UART0 reset bit */ - -/* - *************************************************************************** - * UART defines * - *************************************************************************** - */ -#define MSP_BASE_BAUD 25000000 -#define MSP_UART_REG_LEN 0x20 - -/* - *************************************************************************** - * ELB defines * - *************************************************************************** - */ -#define PCCARD_32 0x02 /* Set if is PCCARD 32 (Cardbus) */ -#define SINGLE_PCCARD 0x01 /* Set to enable single PC card */ - -/* - *************************************************************************** - * CIC defines * - *************************************************************************** - */ - -/* CIC_EXT_CFG_REG */ -#define EXT_INT_POL(eirq) (1 << (eirq + 8)) -#define EXT_INT_EDGE(eirq) (1 << eirq) - -#define CIC_EXT_SET_TRIGGER_LEVEL(reg, eirq) (reg &= ~EXT_INT_EDGE(eirq)) -#define CIC_EXT_SET_TRIGGER_EDGE(reg, eirq) (reg |= EXT_INT_EDGE(eirq)) -#define CIC_EXT_SET_ACTIVE_HI(reg, eirq) (reg |= EXT_INT_POL(eirq)) -#define CIC_EXT_SET_ACTIVE_LO(reg, eirq) (reg &= ~EXT_INT_POL(eirq)) -#define CIC_EXT_SET_ACTIVE_RISING CIC_EXT_SET_ACTIVE_HI -#define CIC_EXT_SET_ACTIVE_FALLING CIC_EXT_SET_ACTIVE_LO - -#define CIC_EXT_IS_TRIGGER_LEVEL(reg, eirq) \ - ((reg & EXT_INT_EDGE(eirq)) == 0) -#define CIC_EXT_IS_TRIGGER_EDGE(reg, eirq) (reg & EXT_INT_EDGE(eirq)) -#define CIC_EXT_IS_ACTIVE_HI(reg, eirq) (reg & EXT_INT_POL(eirq)) -#define CIC_EXT_IS_ACTIVE_LO(reg, eirq) \ - ((reg & EXT_INT_POL(eirq)) == 0) -#define CIC_EXT_IS_ACTIVE_RISING CIC_EXT_IS_ACTIVE_HI -#define CIC_EXT_IS_ACTIVE_FALLING CIC_EXT_IS_ACTIVE_LO - -/* - *************************************************************************** - * Memory Controller defines * - *************************************************************************** - */ - -/* Indirect memory controller registers */ -#define DDRC_CFG(n) (n) -#define DDRC_DEBUG(n) (0x04 + n) -#define DDRC_CTL(n) (0x40 + n) - -/* Macro to perform DDRC indirect write */ -#define DDRC_INDIRECT_WRITE(reg, mask, value) \ -({ \ - *MEM_SS_ADDR = (((mask) & 0xf) << 8) | ((reg) & 0xff); \ - *MEM_SS_DATA = (value); \ - *MEM_SS_WRITE = 1; \ -}) - -/* - *************************************************************************** - * SPI/MPI Mode * - *************************************************************************** - */ -#define SPI_MPI_RX_BUSY 0x00008000 /* SPI/MPI Receive Busy */ -#define SPI_MPI_FIFO_EMPTY 0x00004000 /* SPI/MPI Fifo Empty */ -#define SPI_MPI_TX_BUSY 0x00002000 /* SPI/MPI Transmit Busy */ -#define SPI_MPI_FIFO_FULL 0x00001000 /* SPI/MPU FIFO full */ - -/* - *************************************************************************** - * SPI/MPI Control Register * - *************************************************************************** - */ -#define SPI_MPI_RX_START 0x00000004 /* Start receive command */ -#define SPI_MPI_FLUSH_Q 0x00000002 /* Flush SPI/MPI Queue */ -#define SPI_MPI_TX_START 0x00000001 /* Start Transmit Command */ - -#endif /* !_ASM_MSP_REGS_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h deleted file mode 100644 index 9a763eb5e5f5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt controller. - * - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_SLP_INT_H -#define _MSP_SLP_INT_H - -/* - * The PMC-Sierra SLP interrupts are arranged in a 3 level cascaded - * hierarchical system. The first level are the direct MIPS interrupts - * and are assigned the interrupt range 0-7. The second level is the SLM - * interrupt controller and is assigned the range 8-39. The third level - * comprises the Peripherial block, the PCI block, the PCI MSI block and - * the SLP. The PCI interrupts and the SLP errors are handled by the - * relevant subsystems so the core interrupt code needs only concern - * itself with the Peripheral block. These are assigned interrupts in - * the range 40-71. - */ - -/* - * IRQs directly connected to CPU - */ -#define MSP_MIPS_INTBASE 0 -#define MSP_INT_SW0 0 /* IRQ for swint0, C_SW0 */ -#define MSP_INT_SW1 1 /* IRQ for swint1, C_SW1 */ -#define MSP_INT_MAC0 2 /* IRQ for MAC 0, C_IRQ0 */ -#define MSP_INT_MAC1 3 /* IRQ for MAC 1, C_IRQ1 */ -#define MSP_INT_C_IRQ2 4 /* Wired off, C_IRQ2 */ -#define MSP_INT_VE 5 /* IRQ for Voice Engine, C_IRQ3 */ -#define MSP_INT_SLP 6 /* IRQ for SLM block, C_IRQ4 */ -#define MSP_INT_TIMER 7 /* IRQ for the MIPS timer, C_IRQ5 */ - -/* - * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4) - * These defines should be tied to the register definition for the SLM - * interrupt routine. For now, just use hard-coded values. - */ -#define MSP_SLP_INTBASE (MSP_MIPS_INTBASE + 8) -#define MSP_INT_EXT0 (MSP_SLP_INTBASE + 0) - /* External interrupt 0 */ -#define MSP_INT_EXT1 (MSP_SLP_INTBASE + 1) - /* External interrupt 1 */ -#define MSP_INT_EXT2 (MSP_SLP_INTBASE + 2) - /* External interrupt 2 */ -#define MSP_INT_EXT3 (MSP_SLP_INTBASE + 3) - /* External interrupt 3 */ -/* Reserved 4-7 */ - -/* - ************************************************************************* - * DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER * - * Some MSP produces have this interrupt labelled as Voice and some are * - * SEC mbox ... * - ************************************************************************* - */ -#define MSP_INT_SLP_VE (MSP_SLP_INTBASE + 8) - /* Cascaded IRQ for Voice Engine*/ -#define MSP_INT_SLP_TDM (MSP_SLP_INTBASE + 9) - /* TDM interrupt */ -#define MSP_INT_SLP_MAC0 (MSP_SLP_INTBASE + 10) - /* Cascaded IRQ for MAC 0 */ -#define MSP_INT_SLP_MAC1 (MSP_SLP_INTBASE + 11) - /* Cascaded IRQ for MAC 1 */ -#define MSP_INT_SEC (MSP_SLP_INTBASE + 12) - /* IRQ for security engine */ -#define MSP_INT_PER (MSP_SLP_INTBASE + 13) - /* Peripheral interrupt */ -#define MSP_INT_TIMER0 (MSP_SLP_INTBASE + 14) - /* SLP timer 0 */ -#define MSP_INT_TIMER1 (MSP_SLP_INTBASE + 15) - /* SLP timer 1 */ -#define MSP_INT_TIMER2 (MSP_SLP_INTBASE + 16) - /* SLP timer 2 */ -#define MSP_INT_SLP_TIMER (MSP_SLP_INTBASE + 17) - /* Cascaded MIPS timer */ -#define MSP_INT_BLKCP (MSP_SLP_INTBASE + 18) - /* Block Copy */ -#define MSP_INT_UART0 (MSP_SLP_INTBASE + 19) - /* UART 0 */ -#define MSP_INT_PCI (MSP_SLP_INTBASE + 20) - /* PCI subsystem */ -#define MSP_INT_PCI_DBELL (MSP_SLP_INTBASE + 21) - /* PCI doorbell */ -#define MSP_INT_PCI_MSI (MSP_SLP_INTBASE + 22) - /* PCI Message Signal */ -#define MSP_INT_PCI_BC0 (MSP_SLP_INTBASE + 23) - /* PCI Block Copy 0 */ -#define MSP_INT_PCI_BC1 (MSP_SLP_INTBASE + 24) - /* PCI Block Copy 1 */ -#define MSP_INT_SLP_ERR (MSP_SLP_INTBASE + 25) - /* SLP error condition */ -#define MSP_INT_MAC2 (MSP_SLP_INTBASE + 26) - /* IRQ for MAC2 */ -/* Reserved 26-31 */ - -/* - * IRQs cascaded on SLP PER interrupt (MSP_INT_PER) - */ -#define MSP_PER_INTBASE (MSP_SLP_INTBASE + 32) -/* Reserved 0-1 */ -#define MSP_INT_UART1 (MSP_PER_INTBASE + 2) - /* UART 1 */ -/* Reserved 3-5 */ -#define MSP_INT_2WIRE (MSP_PER_INTBASE + 6) - /* 2-wire */ -#define MSP_INT_TM0 (MSP_PER_INTBASE + 7) - /* Peripheral timer block out 0 */ -#define MSP_INT_TM1 (MSP_PER_INTBASE + 8) - /* Peripheral timer block out 1 */ -/* Reserved 9 */ -#define MSP_INT_SPRX (MSP_PER_INTBASE + 10) - /* SPI RX complete */ -#define MSP_INT_SPTX (MSP_PER_INTBASE + 11) - /* SPI TX complete */ -#define MSP_INT_GPIO (MSP_PER_INTBASE + 12) - /* GPIO */ -#define MSP_INT_PER_ERR (MSP_PER_INTBASE + 13) - /* Peripheral error */ -/* Reserved 14-31 */ - -#endif /* !_MSP_SLP_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h deleted file mode 100644 index 3cc3edb336b6..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************** - * Copyright (c) 2000-2007 PMC-Sierra INC. - * - * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS - * SOFTWARE. - */ -#ifndef MSP_USB_H_ -#define MSP_USB_H_ - -#define NUM_USB_DEVS 1 - -/* Register spaces for USB host 0 */ -#define MSP_USB0_MAB_START (MSP_USB0_BASE + 0x0) -#define MSP_USB0_MAB_END (MSP_USB0_BASE + 0x17) -#define MSP_USB0_ID_START (MSP_USB0_BASE + 0x40000) -#define MSP_USB0_ID_END (MSP_USB0_BASE + 0x4008f) -#define MSP_USB0_HS_START (MSP_USB0_BASE + 0x40100) -#define MSP_USB0_HS_END (MSP_USB0_BASE + 0x401FF) - -/* Register spaces for USB host 1 */ -#define MSP_USB1_MAB_START (MSP_USB1_BASE + 0x0) -#define MSP_USB1_MAB_END (MSP_USB1_BASE + 0x17) -#define MSP_USB1_ID_START (MSP_USB1_BASE + 0x40000) -#define MSP_USB1_ID_END (MSP_USB1_BASE + 0x4008f) -#define MSP_USB1_HS_START (MSP_USB1_BASE + 0x40100) -#define MSP_USB1_HS_END (MSP_USB1_BASE + 0x401ff) - -/* USB Identification registers */ -struct msp_usbid_regs { - u32 id; /* 0x0: Identification register */ - u32 hwgen; /* 0x4: General HW params */ - u32 hwhost; /* 0x8: Host HW params */ - u32 hwdev; /* 0xc: Device HW params */ - u32 hwtxbuf; /* 0x10: Tx buffer HW params */ - u32 hwrxbuf; /* 0x14: Rx buffer HW params */ - u32 reserved[26]; - u32 timer0_load; /* 0x80: General-purpose timer 0 load*/ - u32 timer0_ctrl; /* 0x84: General-purpose timer 0 control */ - u32 timer1_load; /* 0x88: General-purpose timer 1 load*/ - u32 timer1_ctrl; /* 0x8c: General-purpose timer 1 control */ -}; - -/* MSBus to AMBA registers */ -struct msp_mab_regs { - u32 isr; /* 0x0: Interrupt status */ - u32 imr; /* 0x4: Interrupt mask */ - u32 thcr0; /* 0x8: Transaction header capture 0 */ - u32 thcr1; /* 0xc: Transaction header capture 1 */ - u32 int_stat; /* 0x10: Interrupt status summary */ - u32 phy_cfg; /* 0x14: USB phy config */ -}; - -/* EHCI registers */ -struct msp_usbhs_regs { - u32 hciver; /* 0x0: Version and offset to operational regs */ - u32 hcsparams; /* 0x4: Host control structural parameters */ - u32 hccparams; /* 0x8: Host control capability parameters */ - u32 reserved0[5]; - u32 dciver; /* 0x20: Device interface version */ - u32 dccparams; /* 0x24: Device control capability parameters */ - u32 reserved1[6]; - u32 cmd; /* 0x40: USB command */ - u32 sts; /* 0x44: USB status */ - u32 int_ena; /* 0x48: USB interrupt enable */ - u32 frindex; /* 0x4c: Frame index */ - u32 reserved3; - union { - struct { - u32 flb_addr; /* 0x54: Frame list base address */ - u32 next_async_addr; /* 0x58: next asynchronous addr */ - u32 ttctrl; /* 0x5c: embedded transaction translator - async buffer status */ - u32 burst_size; /* 0x60: Controller burst size */ - u32 tx_fifo_ctrl; /* 0x64: Tx latency FIFO tuning */ - u32 reserved0[4]; - u32 endpt_nak; /* 0x78: Endpoint NAK */ - u32 endpt_nak_ena; /* 0x7c: Endpoint NAK enable */ - u32 cfg_flag; /* 0x80: Config flag */ - u32 port_sc1; /* 0x84: Port status & control 1 */ - u32 reserved1[7]; - u32 otgsc; /* 0xa4: OTG status & control */ - u32 mode; /* 0xa8: USB controller mode */ - } host; - - struct { - u32 dev_addr; /* 0x54: Device address */ - u32 endpt_list_addr; /* 0x58: Endpoint list address */ - u32 reserved0[7]; - u32 endpt_nak; /* 0x74 */ - u32 endpt_nak_ctrl; /* 0x78 */ - u32 cfg_flag; /* 0x80 */ - u32 port_sc1; /* 0x84: Port status & control 1 */ - u32 reserved[7]; - u32 otgsc; /* 0xa4: OTG status & control */ - u32 mode; /* 0xa8: USB controller mode */ - u32 endpt_setup_stat; /* 0xac */ - u32 endpt_prime; /* 0xb0 */ - u32 endpt_flush; /* 0xb4 */ - u32 endpt_stat; /* 0xb8 */ - u32 endpt_complete; /* 0xbc */ - u32 endpt_ctrl0; /* 0xc0 */ - u32 endpt_ctrl1; /* 0xc4 */ - u32 endpt_ctrl2; /* 0xc8 */ - u32 endpt_ctrl3; /* 0xcc */ - } device; - } u; -}; -/* - * Container for the more-generic platform_device. - * This exists mainly as a way to map the non-standard register - * spaces and make them accessible to the USB ISR. - */ -struct mspusb_device { - struct msp_mab_regs __iomem *mab_regs; - struct msp_usbid_regs __iomem *usbid_regs; - struct msp_usbhs_regs __iomem *usbhs_regs; - struct platform_device dev; -}; - -#define to_mspusb_device(x) container_of((x), struct mspusb_device, dev) -#define TO_HOST_ID(x) ((x) & 0x3) -#endif /*MSP_USB_H_*/ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h b/arch/mips/include/asm/mach-pmcs-msp71xx/war.h deleted file mode 100644 index 31c546f58bb5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org> - */ -#ifndef __ASM_MIPS_PMC_SIERRA_WAR_H -#define __ASM_MIPS_PMC_SIERRA_WAR_H - -#define R4600_V1_INDEX_ICACHEOP_WAR 0 -#define R4600_V1_HIT_CACHEOP_WAR 0 -#define R4600_V2_HIT_CACHEOP_WAR 0 -#define BCM1250_M3_WAR 0 -#define SIBYTE_1956_WAR 0 -#define MIPS4K_ICACHE_REFILL_WAR 0 -#define MIPS_CACHE_SYNC_WAR 0 -#define TX49XX_ICACHE_INDEX_INV_WAR 0 -#define ICACHE_REFILLS_WORKAROUND_WAR 0 -#define R10000_LLSC_WAR 0 -#if defined(CONFIG_PMC_MSP7120_EVAL) || defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) -#define MIPS34K_MISSED_ITLB_WAR 1 -#else -#define MIPS34K_MISSED_ITLB_WAR 0 -#endif - -#endif /* __ASM_MIPS_PMC_SIERRA_WAR_H */ diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h index 6ea5908f0c11..c4579f1705c2 100644 --- a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h @@ -45,7 +45,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h index 65483a4681ab..e1af1ba50bd8 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621.h +++ b/arch/mips/include/asm/mach-ralink/mt7621.h @@ -31,6 +31,4 @@ #define MT7621_CHIP_NAME0 0x3637544D #define MT7621_CHIP_NAME1 0x20203132 -#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) - #endif diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h index e06f517b2588..168359a0a58d 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h @@ -46,7 +46,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h index 9c069646d0bd..fdaf8c9182bc 100644 --- a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h @@ -44,7 +44,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 16 #define cpu_icache_line_size() 16 diff --git a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h index 2e423fd15384..7a385fe784a6 100644 --- a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h @@ -44,7 +44,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h index 7cee0e232580..0a61910f6521 100644 --- a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h @@ -43,7 +43,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h index bc46179fdf40..8539ccfb69b7 100644 --- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h @@ -54,7 +54,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_has_inclusive_pcaches 0 diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h index 6f40d1515580..9a6eefd12757 100644 --- a/arch/mips/include/asm/mach-rc32434/pci.h +++ b/arch/mips/include/asm/mach-rc32434/pci.h @@ -319,9 +319,6 @@ struct pci_msu { #define PCIM_H_EA 0x3 #define PCIM_H_IA_FIX 0x4 #define PCIM_H_IA_RR 0x5 -#if 0 -#define PCI_ADDR_START 0x13000000 -#endif #define PCI_ADDR_START 0x50000000 diff --git a/arch/mips/include/asm/mach-tx39xx/ioremap.h b/arch/mips/include/asm/mach-tx39xx/ioremap.h index 077b3c9971f7..157a7292397e 100644 --- a/arch/mips/include/asm/mach-tx39xx/ioremap.h +++ b/arch/mips/include/asm/mach-tx39xx/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-tx49xx/ioremap.h b/arch/mips/include/asm/mach-tx49xx/ioremap.h index c6b9e05f44c4..b1f3710acf8e 100644 --- a/arch/mips/include/asm/mach-tx49xx/ioremap.h +++ b/arch/mips/include/asm/mach-tx49xx/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-xilfpga/irq.h b/arch/mips/include/asm/mach-xilfpga/irq.h deleted file mode 100644 index 15ad29ec1dee..000000000000 --- a/arch/mips/include/asm/mach-xilfpga/irq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com> - */ - -#ifndef __MIPS_ASM_MACH_XILFPGA_IRQ_H__ -#define __MIPS_ASM_MACH_XILFPGA_IRQ_H__ - -#define NR_IRQS 32 - -#include <asm/mach-generic/irq.h> - -#endif /* __MIPS_ASM_MACH_XILFPGA_IRQ_H__ */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 796fe47cfd17..796dbb86575b 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -468,6 +468,7 @@ #define EXCCODE_THREAD 25 /* Thread exceptions (MT) */ #define EXCCODE_DSPDIS 26 /* DSP disabled exception */ #define EXCCODE_GE 27 /* Virtualized guest exception (VZ) */ +#define EXCCODE_CACHEERR 30 /* Parity/ECC occured on a core */ /* Implementation specific trap codes used by MIPS cores */ #define MIPS_EXCCODE_TLBPAR 16 /* TLB parity error exception */ @@ -563,6 +564,17 @@ #define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) #define MIPS_CONF_AR (_ULCAST_(7) << 10) #define MIPS_CONF_AT (_ULCAST_(3) << 13) +#define MIPS_CONF_BE (_ULCAST_(1) << 15) +#define MIPS_CONF_BM (_ULCAST_(1) << 16) +#define MIPS_CONF_MM (_ULCAST_(3) << 17) +#define MIPS_CONF_MM_SYSAD (_ULCAST_(1) << 17) +#define MIPS_CONF_MM_FULL (_ULCAST_(2) << 17) +#define MIPS_CONF_SB (_ULCAST_(1) << 21) +#define MIPS_CONF_UDI (_ULCAST_(1) << 22) +#define MIPS_CONF_DSP (_ULCAST_(1) << 23) +#define MIPS_CONF_ISP (_ULCAST_(1) << 24) +#define MIPS_CONF_KU (_ULCAST_(3) << 25) +#define MIPS_CONF_K23 (_ULCAST_(3) << 28) #define MIPS_CONF_M (_ULCAST_(1) << 31) /* @@ -674,13 +686,38 @@ #define MIPS_CONF5_CV (_ULCAST_(1) << 29) #define MIPS_CONF5_K (_ULCAST_(1) << 30) -#define MIPS_CONF6_SYND (_ULCAST_(1) << 13) +/* Config6 feature bits for proAptiv/P5600 */ + +/* Jump register cache prediction disable */ +#define MIPS_CONF6_MTI_JRCD (_ULCAST_(1) << 0) +/* MIPSr6 extensions enable */ +#define MIPS_CONF6_MTI_R6 (_ULCAST_(1) << 2) +/* IFU Performance Control */ +#define MIPS_CONF6_MTI_IFUPERFCTL (_ULCAST_(3) << 10) +#define MIPS_CONF6_MTI_SYND (_ULCAST_(1) << 13) +/* Sleep state performance counter disable */ +#define MIPS_CONF6_MTI_SPCD (_ULCAST_(1) << 14) /* proAptiv FTLB on/off bit */ -#define MIPS_CONF6_FTLBEN (_ULCAST_(1) << 15) -/* Loongson-3 FTLB on/off bit */ -#define MIPS_CONF6_FTLBDIS (_ULCAST_(1) << 22) +#define MIPS_CONF6_MTI_FTLBEN (_ULCAST_(1) << 15) +/* Disable load/store bonding */ +#define MIPS_CONF6_MTI_DLSB (_ULCAST_(1) << 21) /* FTLB probability bits */ -#define MIPS_CONF6_FTLBP_SHIFT (16) +#define MIPS_CONF6_MTI_FTLBP_SHIFT (16) + +/* Config6 feature bits for Loongson-3 */ + +/* Loongson-3 internal timer bit */ +#define MIPS_CONF6_LOONGSON_INTIMER (_ULCAST_(1) << 6) +/* Loongson-3 external timer bit */ +#define MIPS_CONF6_LOONGSON_EXTIMER (_ULCAST_(1) << 7) +/* Loongson-3 SFB on/off bit, STFill in manual */ +#define MIPS_CONF6_LOONGSON_SFBEN (_ULCAST_(1) << 8) +/* Loongson-3's LL on exclusive cacheline */ +#define MIPS_CONF6_LOONGSON_LLEXC (_ULCAST_(1) << 16) +/* Loongson-3's SC has a random delay */ +#define MIPS_CONF6_LOONGSON_SCRAND (_ULCAST_(1) << 17) +/* Loongson-3 FTLB on/off bit, VTLBOnly in manual */ +#define MIPS_CONF6_LOONGSON_FTLBDIS (_ULCAST_(1) << 22) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) @@ -753,10 +790,18 @@ /* MAAR bit definitions */ #define MIPS_MAAR_VH (_U64CAST_(1) << 63) -#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) +#define MIPS_MAAR_ADDR GENMASK_ULL(55, 12) #define MIPS_MAAR_ADDR_SHIFT 12 #define MIPS_MAAR_S (_ULCAST_(1) << 1) #define MIPS_MAAR_VL (_ULCAST_(1) << 0) +#ifdef CONFIG_XPA +#define MIPS_MAAR_V (MIPS_MAAR_VH | MIPS_MAAR_VL) +#else +#define MIPS_MAAR_V MIPS_MAAR_VL +#endif +#define MIPS_MAARX_VH (_ULCAST_(1) << 31) +#define MIPS_MAARX_ADDR 0xF +#define MIPS_MAARX_ADDR_SHIFT 32 /* MAARI bit definitions */ #define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0) @@ -997,6 +1042,8 @@ #define LOONGSON_DIAG_ITLB (_ULCAST_(1) << 2) /* Flush DTLB */ #define LOONGSON_DIAG_DTLB (_ULCAST_(1) << 3) +/* Allow some CACHE instructions (CACHE0, 1, 3, 21 and 23) in user mode */ +#define LOONGSON_DIAG_UCAC (_ULCAST_(1) << 8) /* Flush VTLB */ #define LOONGSON_DIAG_VTLB (_ULCAST_(1) << 12) /* Flush FTLB */ @@ -1717,6 +1764,8 @@ do { \ #define write_c0_lladdr(val) __write_ulong_c0_register($17, 0, val) #define read_c0_maar() __read_ulong_c0_register($17, 1) #define write_c0_maar(val) __write_ulong_c0_register($17, 1, val) +#define readx_c0_maar() __readx_32bit_c0_register($17, 1) +#define writex_c0_maar(val) __writex_32bit_c0_register($17, 1, val) #define read_c0_maari() __read_32bit_c0_register($17, 2) #define write_c0_maari(val) __write_32bit_c0_register($17, 2, val) diff --git a/arch/mips/include/asm/nile4.h b/arch/mips/include/asm/nile4.h deleted file mode 100644 index 9d36b7823603..000000000000 --- a/arch/mips/include/asm/nile4.h +++ /dev/null @@ -1,310 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * asm-mips/nile4.h -- NEC Vrc-5074 Nile 4 definitions - * - * Copyright (C) 2000 Geert Uytterhoeven <geert@linux-m68k.org> - * Sony Software Development Center Europe (SDCE), Brussels - * - * This file is based on the following documentation: - * - * NEC Vrc 5074 System Controller Data Sheet, June 1998 - */ - -#ifndef _ASM_NILE4_H -#define _ASM_NILE4_H - -#define NILE4_BASE 0xbfa00000 -#define NILE4_SIZE 0x00200000 /* 2 MB */ - - - /* - * Physical Device Address Registers (PDARs) - */ - -#define NILE4_SDRAM0 0x0000 /* SDRAM Bank 0 [R/W] */ -#define NILE4_SDRAM1 0x0008 /* SDRAM Bank 1 [R/W] */ -#define NILE4_DCS2 0x0010 /* Device Chip-Select 2 [R/W] */ -#define NILE4_DCS3 0x0018 /* Device Chip-Select 3 [R/W] */ -#define NILE4_DCS4 0x0020 /* Device Chip-Select 4 [R/W] */ -#define NILE4_DCS5 0x0028 /* Device Chip-Select 5 [R/W] */ -#define NILE4_DCS6 0x0030 /* Device Chip-Select 6 [R/W] */ -#define NILE4_DCS7 0x0038 /* Device Chip-Select 7 [R/W] */ -#define NILE4_DCS8 0x0040 /* Device Chip-Select 8 [R/W] */ -#define NILE4_PCIW0 0x0060 /* PCI Address Window 0 [R/W] */ -#define NILE4_PCIW1 0x0068 /* PCI Address Window 1 [R/W] */ -#define NILE4_INTCS 0x0070 /* Controller Internal Registers and Devices */ - /* [R/W] */ -#define NILE4_BOOTCS 0x0078 /* Boot ROM Chip-Select [R/W] */ - - - /* - * CPU Interface Registers - */ - -#define NILE4_CPUSTAT 0x0080 /* CPU Status [R/W] */ -#define NILE4_INTCTRL 0x0088 /* Interrupt Control [R/W] */ -#define NILE4_INTSTAT0 0x0090 /* Interrupt Status 0 [R] */ -#define NILE4_INTSTAT1 0x0098 /* Interrupt Status 1 and CPU Interrupt */ - /* Enable [R/W] */ -#define NILE4_INTCLR 0x00A0 /* Interrupt Clear [R/W] */ -#define NILE4_INTPPES 0x00A8 /* PCI Interrupt Control [R/W] */ - - - /* - * Memory-Interface Registers - */ - -#define NILE4_MEMCTRL 0x00C0 /* Memory Control */ -#define NILE4_ACSTIME 0x00C8 /* Memory Access Timing [R/W] */ -#define NILE4_CHKERR 0x00D0 /* Memory Check Error Status [R] */ - - - /* - * PCI-Bus Registers - */ - -#define NILE4_PCICTRL 0x00E0 /* PCI Control [R/W] */ -#define NILE4_PCIARB 0x00E8 /* PCI Arbiter [R/W] */ -#define NILE4_PCIINIT0 0x00F0 /* PCI Master (Initiator) 0 [R/W] */ -#define NILE4_PCIINIT1 0x00F8 /* PCI Master (Initiator) 1 [R/W] */ -#define NILE4_PCIERR 0x00B8 /* PCI Error [R/W] */ - - - /* - * Local-Bus Registers - */ - -#define NILE4_LCNFG 0x0100 /* Local Bus Configuration [R/W] */ -#define NILE4_LCST2 0x0110 /* Local Bus Chip-Select Timing 2 [R/W] */ -#define NILE4_LCST3 0x0118 /* Local Bus Chip-Select Timing 3 [R/W] */ -#define NILE4_LCST4 0x0120 /* Local Bus Chip-Select Timing 4 [R/W] */ -#define NILE4_LCST5 0x0128 /* Local Bus Chip-Select Timing 5 [R/W] */ -#define NILE4_LCST6 0x0130 /* Local Bus Chip-Select Timing 6 [R/W] */ -#define NILE4_LCST7 0x0138 /* Local Bus Chip-Select Timing 7 [R/W] */ -#define NILE4_LCST8 0x0140 /* Local Bus Chip-Select Timing 8 [R/W] */ -#define NILE4_DCSFN 0x0150 /* Device Chip-Select Muxing and Output */ - /* Enables [R/W] */ -#define NILE4_DCSIO 0x0158 /* Device Chip-Selects As I/O Bits [R/W] */ -#define NILE4_BCST 0x0178 /* Local Boot Chip-Select Timing [R/W] */ - - - /* - * DMA Registers - */ - -#define NILE4_DMACTRL0 0x0180 /* DMA Control 0 [R/W] */ -#define NILE4_DMASRCA0 0x0188 /* DMA Source Address 0 [R/W] */ -#define NILE4_DMADESA0 0x0190 /* DMA Destination Address 0 [R/W] */ -#define NILE4_DMACTRL1 0x0198 /* DMA Control 1 [R/W] */ -#define NILE4_DMASRCA1 0x01A0 /* DMA Source Address 1 [R/W] */ -#define NILE4_DMADESA1 0x01A8 /* DMA Destination Address 1 [R/W] */ - - - /* - * Timer Registers - */ - -#define NILE4_T0CTRL 0x01C0 /* SDRAM Refresh Control [R/W] */ -#define NILE4_T0CNTR 0x01C8 /* SDRAM Refresh Counter [R/W] */ -#define NILE4_T1CTRL 0x01D0 /* CPU-Bus Read Time-Out Control [R/W] */ -#define NILE4_T1CNTR 0x01D8 /* CPU-Bus Read Time-Out Counter [R/W] */ -#define NILE4_T2CTRL 0x01E0 /* General-Purpose Timer Control [R/W] */ -#define NILE4_T2CNTR 0x01E8 /* General-Purpose Timer Counter [R/W] */ -#define NILE4_T3CTRL 0x01F0 /* Watchdog Timer Control [R/W] */ -#define NILE4_T3CNTR 0x01F8 /* Watchdog Timer Counter [R/W] */ - - - /* - * PCI Configuration Space Registers - */ - -#define NILE4_PCI_BASE 0x0200 - -#define NILE4_VID 0x0200 /* PCI Vendor ID [R] */ -#define NILE4_DID 0x0202 /* PCI Device ID [R] */ -#define NILE4_PCICMD 0x0204 /* PCI Command [R/W] */ -#define NILE4_PCISTS 0x0206 /* PCI Status [R/W] */ -#define NILE4_REVID 0x0208 /* PCI Revision ID [R] */ -#define NILE4_CLASS 0x0209 /* PCI Class Code [R] */ -#define NILE4_CLSIZ 0x020C /* PCI Cache Line Size [R/W] */ -#define NILE4_MLTIM 0x020D /* PCI Latency Timer [R/W] */ -#define NILE4_HTYPE 0x020E /* PCI Header Type [R] */ -#define NILE4_BIST 0x020F /* BIST [R] (unimplemented) */ -#define NILE4_BARC 0x0210 /* PCI Base Address Register Control [R/W] */ -#define NILE4_BAR0 0x0218 /* PCI Base Address Register 0 [R/W] */ -#define NILE4_BAR1 0x0220 /* PCI Base Address Register 1 [R/W] */ -#define NILE4_CIS 0x0228 /* PCI Cardbus CIS Pointer [R] */ - /* (unimplemented) */ -#define NILE4_SSVID 0x022C /* PCI Sub-System Vendor ID [R/W] */ -#define NILE4_SSID 0x022E /* PCI Sub-System ID [R/W] */ -#define NILE4_ROM 0x0230 /* Expansion ROM Base Address [R] */ - /* (unimplemented) */ -#define NILE4_INTLIN 0x023C /* PCI Interrupt Line [R/W] */ -#define NILE4_INTPIN 0x023D /* PCI Interrupt Pin [R] */ -#define NILE4_MINGNT 0x023E /* PCI Min_Gnt [R] (unimplemented) */ -#define NILE4_MAXLAT 0x023F /* PCI Max_Lat [R] (unimplemented) */ -#define NILE4_BAR2 0x0240 /* PCI Base Address Register 2 [R/W] */ -#define NILE4_BAR3 0x0248 /* PCI Base Address Register 3 [R/W] */ -#define NILE4_BAR4 0x0250 /* PCI Base Address Register 4 [R/W] */ -#define NILE4_BAR5 0x0258 /* PCI Base Address Register 5 [R/W] */ -#define NILE4_BAR6 0x0260 /* PCI Base Address Register 6 [R/W] */ -#define NILE4_BAR7 0x0268 /* PCI Base Address Register 7 [R/W] */ -#define NILE4_BAR8 0x0270 /* PCI Base Address Register 8 [R/W] */ -#define NILE4_BARB 0x0278 /* PCI Base Address Register BOOT [R/W] */ - - - /* - * Serial-Port Registers - */ - -#define NILE4_UART_BASE 0x0300 - -#define NILE4_UARTRBR 0x0300 /* UART Receiver Data Buffer [R] */ -#define NILE4_UARTTHR 0x0300 /* UART Transmitter Data Holding [W] */ -#define NILE4_UARTIER 0x0308 /* UART Interrupt Enable [R/W] */ -#define NILE4_UARTDLL 0x0300 /* UART Divisor Latch LSB [R/W] */ -#define NILE4_UARTDLM 0x0308 /* UART Divisor Latch MSB [R/W] */ -#define NILE4_UARTIIR 0x0310 /* UART Interrupt ID [R] */ -#define NILE4_UARTFCR 0x0310 /* UART FIFO Control [W] */ -#define NILE4_UARTLCR 0x0318 /* UART Line Control [R/W] */ -#define NILE4_UARTMCR 0x0320 /* UART Modem Control [R/W] */ -#define NILE4_UARTLSR 0x0328 /* UART Line Status [R/W] */ -#define NILE4_UARTMSR 0x0330 /* UART Modem Status [R/W] */ -#define NILE4_UARTSCR 0x0338 /* UART Scratch [R/W] */ - -#define NILE4_UART_BASE_BAUD 520833 /* 100 MHz / 12 / 16 */ - - - /* - * Interrupt Lines - */ - -#define NILE4_INT_CPCE 0 /* CPU-Interface Parity-Error Interrupt */ -#define NILE4_INT_CNTD 1 /* CPU No-Target Decode Interrupt */ -#define NILE4_INT_MCE 2 /* Memory-Check Error Interrupt */ -#define NILE4_INT_DMA 3 /* DMA Controller Interrupt */ -#define NILE4_INT_UART 4 /* UART Interrupt */ -#define NILE4_INT_WDOG 5 /* Watchdog Timer Interrupt */ -#define NILE4_INT_GPT 6 /* General-Purpose Timer Interrupt */ -#define NILE4_INT_LBRTD 7 /* Local-Bus Ready Timer Interrupt */ -#define NILE4_INT_INTA 8 /* PCI Interrupt Signal INTA# */ -#define NILE4_INT_INTB 9 /* PCI Interrupt Signal INTB# */ -#define NILE4_INT_INTC 10 /* PCI Interrupt Signal INTC# */ -#define NILE4_INT_INTD 11 /* PCI Interrupt Signal INTD# */ -#define NILE4_INT_INTE 12 /* PCI Interrupt Signal INTE# (ISA cascade) */ -#define NILE4_INT_RESV 13 /* Reserved */ -#define NILE4_INT_PCIS 14 /* PCI SERR# Interrupt */ -#define NILE4_INT_PCIE 15 /* PCI Internal Error Interrupt */ - - - /* - * Nile 4 Register Access - */ - -static inline void nile4_sync(void) -{ - volatile u32 *p = (volatile u32 *)0xbfc00000; - (void)(*p); -} - -static inline void nile4_out32(u32 offset, u32 val) -{ - *(volatile u32 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u32 nile4_in32(u32 offset) -{ - u32 val = *(volatile u32 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - -static inline void nile4_out16(u32 offset, u16 val) -{ - *(volatile u16 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u16 nile4_in16(u32 offset) -{ - u16 val = *(volatile u16 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - -static inline void nile4_out8(u32 offset, u8 val) -{ - *(volatile u8 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u8 nile4_in8(u32 offset) -{ - u8 val = *(volatile u8 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - - - /* - * Physical Device Address Registers - */ - -extern void nile4_set_pdar(u32 pdar, u32 phys, u32 size, int width, - int on_memory_bus, int visible); - - - /* - * PCI Master Registers - */ - -#define NILE4_PCICMD_IACK 0 /* PCI Interrupt Acknowledge */ -#define NILE4_PCICMD_IO 1 /* PCI I/O Space */ -#define NILE4_PCICMD_MEM 3 /* PCI Memory Space */ -#define NILE4_PCICMD_CFG 5 /* PCI Configuration Space */ - - - /* - * PCI Address Spaces - * - * Note that these are multiplexed using PCIINIT[01]! - */ - -#define NILE4_PCI_IO_BASE 0xa6000000 -#define NILE4_PCI_MEM_BASE 0xa8000000 -#define NILE4_PCI_CFG_BASE NILE4_PCI_MEM_BASE -#define NILE4_PCI_IACK_BASE NILE4_PCI_IO_BASE - - -extern void nile4_set_pmr(u32 pmr, u32 type, u32 addr); - - - /* - * Interrupt Programming - */ - -#define NUM_I8259_INTERRUPTS 16 -#define NUM_NILE4_INTERRUPTS 16 - -#define IRQ_I8259_CASCADE NILE4_INT_INTE -#define is_i8259_irq(irq) ((irq) < NUM_I8259_INTERRUPTS) -#define nile4_to_irq(n) ((n)+NUM_I8259_INTERRUPTS) -#define irq_to_nile4(n) ((n)-NUM_I8259_INTERRUPTS) - -extern void nile4_map_irq(int nile4_irq, int cpu_irq); -extern void nile4_map_irq_all(int cpu_irq); -extern void nile4_enable_irq(unsigned int nile4_irq); -extern void nile4_disable_irq(unsigned int nile4_irq); -extern void nile4_disable_irq_all(void); -extern u16 nile4_get_irq_stat(int cpu_irq); -extern void nile4_enable_irq_output(int cpu_irq); -extern void nile4_disable_irq_output(int cpu_irq); -extern void nile4_set_pci_irq_polarity(int pci_irq, int high); -extern void nile4_set_pci_irq_level_or_edge(int pci_irq, int level); -extern void nile4_clear_irq(int nile4_irq); -extern void nile4_clear_irq_mask(u32 mask); -extern u8 nile4_i8259_iack(void); -extern void nile4_dump_irq_status(void); /* Debug */ - -#endif diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h index cbc7cdae1c6a..5ef6c38150f5 100644 --- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h +++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h @@ -46,7 +46,7 @@ static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void) case OCTEON_CN78XX & OCTEON_FAMILY_MASK: if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) return 0x0000000000003CB0ull; - /* Else, fall through */ + fallthrough; default: return 0x0000000000023CB0ull; } diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index e2f503fc7a84..6a77bc4a6eec 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -49,7 +49,7 @@ static inline unsigned int page_size_ftlb(unsigned int mmuextdef) return 6; if (PAGE_SIZE > (256 << 10)) return 7; /* reserved */ - /* fall through */ + fallthrough; case MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT: return (PAGE_SHIFT - 10) / 2; default: diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index f92716cfa4f4..ee5dc0c145b9 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -172,6 +172,8 @@ extern pte_t invalid_pte_table[PTRS_PER_PTE]; +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + #ifndef __PAGETABLE_PUD_FOLDED /* * For 4-level pagetables we defines these ourselves, for 3-level the @@ -210,8 +212,6 @@ static inline void p4d_clear(p4d_t *p4dp) p4d_val(*p4dp) = (unsigned long)invalid_pud_table; } -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) - static inline unsigned long p4d_page_vaddr(p4d_t p4d) { return p4d_val(p4d); diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h index 4da79b85c179..e26dc41a8a68 100644 --- a/arch/mips/include/asm/pgtable-bits.h +++ b/arch/mips/include/asm/pgtable-bits.h @@ -55,6 +55,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif }; /* @@ -84,6 +87,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif }; #elif defined(CONFIG_CPU_R3K_TLB) @@ -99,6 +105,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif /* Used by TLB hardware (placed in EntryLo) */ _PAGE_GLOBAL_SHIFT = 8, @@ -125,7 +134,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif - +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif /* Used by TLB hardware (placed in EntryLo*) */ #if defined(CONFIG_CPU_HAS_RIXI) _PAGE_NO_EXEC_SHIFT, @@ -152,6 +163,11 @@ enum pgtable_bits { #else # define _PAGE_SPECIAL 0 #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) +# define _PAGE_SOFT_DIRTY (1 << _PAGE_SOFT_DIRTY_SHIFT) +#else +# define _PAGE_SOFT_DIRTY 0 +#endif /* Used by TLB hardware (placed in EntryLo*) */ #if defined(CONFIG_XPA) @@ -269,6 +285,6 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val) #define __WRITEABLE (_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED) #define _PAGE_CHG_MASK (_PAGE_ACCESSED | _PAGE_MODIFIED | \ - _PFN_MASK | _CACHE_MASK) + _PAGE_SOFT_DIRTY | _PFN_MASK | _CACHE_MASK) #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index bfbab6652e20..32760b41aa31 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -400,7 +400,7 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_MODIFIED; + pte_val(pte) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY; if (pte_val(pte) & _PAGE_WRITE) pte_val(pte) |= _PAGE_SILENT_WRITE; return pte; @@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } +#define pte_sw_mkyoung pte_mkyoung + #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } @@ -423,6 +425,30 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline bool pte_soft_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_SOFT_DIRTY; +} +#define pte_swp_soft_dirty pte_soft_dirty + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + pte_val(pte) |= _PAGE_SOFT_DIRTY; + return pte; +} +#define pte_swp_mksoft_dirty pte_mksoft_dirty + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_SOFT_DIRTY); + return pte; +} +#define pte_swp_clear_soft_dirty pte_clear_soft_dirty + +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + #endif /* @@ -454,6 +480,31 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, + unsigned long address) +{ +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return pte_val(pte_a) == pte_val(pte_b); +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + if (!pte_same(*ptep, entry)) + set_pte_at(vma->vm_mm, address, ptep, entry); + /* + * update_mmu_cache will unconditionally execute, handling both + * the case that the PTE changed and the spurious fault case. + */ + return true; +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -481,8 +532,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); + pte_val(pte) &= _PAGE_CHG_MASK; + pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK; + if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ)) + pte_val(pte) |= _PAGE_SILENT_READ; + return pte; } #endif @@ -497,6 +551,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, __update_tlb(vma, address, pte); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -507,20 +564,17 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) -#ifdef CONFIG_PHYS_ADDR_T_64BIT -extern int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); - -static inline int io_remap_pfn_range(struct vm_area_struct *vma, - unsigned long vaddr, - unsigned long pfn, - unsigned long size, - pgprot_t prot) -{ - phys_addr_t phys_addr_high = fixup_bigphys_addr(pfn << PAGE_SHIFT, size); - return remap_pfn_range(vma, vaddr, phys_addr_high >> PAGE_SHIFT, size, prot); -} +/* + * Allow physical addresses to be fixed up to help 36-bit peripherals. + */ +#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR +phys_addr_t fixup_bigphys_addr(phys_addr_t addr, phys_addr_t size); +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr, + unsigned long pfn, unsigned long size, pgprot_t prot); #define io_remap_pfn_range io_remap_pfn_range -#endif +#else +#define fixup_bigphys_addr(addr, size) (addr) +#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -579,7 +633,7 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= _PAGE_MODIFIED; + pmd_val(pmd) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY; if (pmd_val(pmd) & _PAGE_WRITE) pmd_val(pmd) |= _PAGE_SILENT_WRITE; @@ -608,6 +662,26 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd; } +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + pmd_val(pmd) |= _PAGE_SOFT_DIRTY; + return pmd; +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_SOFT_DIRTY); + return pmd; +} + +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + /* Extern to avoid header file madness */ extern pmd_t mk_pmd(struct page *page, pgprot_t prot); diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 7990c1c70471..5d9ff61004ca 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) { extern const struct plat_smp_ops *mp_ops; /* private */ - mp_ops->send_ipi_mask(cpumask_of(cpu), SMP_CALL_FUNCTION); + mp_ops->send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 4d6ad907ae54..3e8d2aaf96af 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -424,7 +424,7 @@ .macro RESTORE_SP_AND_RET docfi=0 RESTORE_SP \docfi -#ifdef CONFIG_CPU_MIPSR6 +#if defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) eretnc #else .set push diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 09cbe9042828..0b0a93bf83cd 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -67,11 +67,11 @@ do { \ #endif /* - * Clear LLBit during context switches on MIPSr6 such that eretnc can be used + * Clear LLBit during context switches on MIPSr5+ such that eretnc can be used * unconditionally when returning to userland in entry.S. */ -#define __clear_r6_hw_ll_bit() do { \ - if (cpu_has_mips_r6) \ +#define __clear_r5_hw_ll_bit() do { \ + if (cpu_has_mips_r5 || cpu_has_mips_r6) \ write_c0_lladdr(0); \ } while (0) @@ -129,7 +129,7 @@ do { \ } \ clear_c0_status(ST0_CU2); \ } \ - __clear_r6_hw_ll_bit(); \ + __clear_r5_hw_ll_bit(); \ __clear_software_ll_bit(); \ if (cpu_has_userlocal) \ write_c0_userlocal(task_thread_info(next)->tp_value); \ diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h new file mode 100644 index 000000000000..2022b18944b9 --- /dev/null +++ b/arch/mips/include/asm/unaligned-emul.h @@ -0,0 +1,779 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_MIPS_UNALIGNED_EMUL_H +#define _ASM_MIPS_UNALIGNED_EMUL_H + +#include <asm/asm.h> + +#ifdef __BIG_ENDIAN +#define _LoadHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ (".set\tnoat\n" \ + "1:\t"type##_lb("%0", "0(%2)")"\n" \ + "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\t.set\tat\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "(%2)")"\n" \ + "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n" \ + ".set\tnoat\n\t" \ + "1:"type##_lb("%0", "0(%2)")"\n\t" \ + "2:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "3(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#define _LoadHWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_lbu("%0", "0(%2)")"\n" \ + "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".set\tat\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "(%2)")"\n" \ + "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ + "dsll\t%0, %0, 32\n\t" \ + "dsrl\t%0, %0, 32\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl\t%0, (%2)\n" \ + "2:\tldr\t%0, 7(%2)\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_lbu("%0", "0(%2)")"\n\t" \ + "2:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "3(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:lb\t%0, 0(%2)\n\t" \ + "2:lbu\t $1, 1(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:lbu\t$1, 2(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:lbu\t$1, 3(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "5:lbu\t$1, 4(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "6:lbu\t$1, 5(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "7:lbu\t$1, 6(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "8:lbu\t$1, 7(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n\t" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + + +#define _StoreHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_sb("%1", "1(%2)")"\n" \ + "srl\t$1, %1, 0x8\n" \ + "2:\t"type##_sb("$1", "0(%2)")"\n" \ + ".set\tat\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT));\ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_swl("%1", "(%2)")"\n" \ + "2:\t"type##_swr("%1", "3(%2)")"\n\t"\ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tsdl\t%1,(%2)\n" \ + "2:\tsdr\t%1, 7(%2)\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_sb("%1", "3(%2)")"\n\t" \ + "srl\t$1, %1, 0x8\n\t" \ + "2:"type##_sb("$1", "2(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "3:"type##_sb("$1", "1(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "4:"type##_sb("$1", "0(%2)")"\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:sb\t%1, 7(%2)\n\t" \ + "dsrl\t$1, %1, 0x8\n\t" \ + "2:sb\t$1, 6(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "3:sb\t$1, 5(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "4:sb\t$1, 4(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "5:sb\t$1, 3(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "6:sb\t$1, 2(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "7:sb\t$1, 1(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "8:sb\t$1, 0(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#else /* __BIG_ENDIAN */ + +#define _LoadHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ (".set\tnoat\n" \ + "1:\t"type##_lb("%0", "1(%2)")"\n" \ + "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\t.set\tat\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "3(%2)")"\n" \ + "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n" \ + ".set\tnoat\n\t" \ + "1:"type##_lb("%0", "3(%2)")"\n\t" \ + "2:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "0(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + + +#define _LoadHWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_lbu("%0", "1(%2)")"\n" \ + "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".set\tat\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "3(%2)")"\n" \ + "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ + "dsll\t%0, %0, 32\n\t" \ + "dsrl\t%0, %0, 32\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl\t%0, 7(%2)\n" \ + "2:\tldr\t%0, (%2)\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_lbu("%0", "3(%2)")"\n\t" \ + "2:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "0(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:lb\t%0, 7(%2)\n\t" \ + "2:lbu\t$1, 6(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:lbu\t$1, 5(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:lbu\t$1, 4(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "5:lbu\t$1, 3(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "6:lbu\t$1, 2(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "7:lbu\t$1, 1(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "8:lbu\t$1, 0(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n\t" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#define _StoreHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_sb("%1", "0(%2)")"\n" \ + "srl\t$1,%1, 0x8\n" \ + "2:\t"type##_sb("$1", "1(%2)")"\n" \ + ".set\tat\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT));\ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_swl("%1", "3(%2)")"\n" \ + "2:\t"type##_swr("%1", "(%2)")"\n\t"\ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tsdl\t%1, 7(%2)\n" \ + "2:\tsdr\t%1, (%2)\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without swl and sdl instructions */ +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_sb("%1", "0(%2)")"\n\t" \ + "srl\t$1, %1, 0x8\n\t" \ + "2:"type##_sb("$1", "1(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "3:"type##_sb("$1", "2(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "4:"type##_sb("$1", "3(%2)")"\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:sb\t%1, 0(%2)\n\t" \ + "dsrl\t$1, %1, 0x8\n\t" \ + "2:sb\t$1, 1(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "3:sb\t$1, 2(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "4:sb\t$1, 3(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "5:sb\t$1, 4(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "6:sb\t$1, 5(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "7:sb\t$1, 6(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "8:sb\t$1, 7(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ +#endif + +#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) +#define LoadHWUE(addr, value, res) _LoadHWU(addr, value, res, user) +#define LoadWU(addr, value, res) _LoadWU(addr, value, res, kernel) +#define LoadWUE(addr, value, res) _LoadWU(addr, value, res, user) +#define LoadHW(addr, value, res) _LoadHW(addr, value, res, kernel) +#define LoadHWE(addr, value, res) _LoadHW(addr, value, res, user) +#define LoadW(addr, value, res) _LoadW(addr, value, res, kernel) +#define LoadWE(addr, value, res) _LoadW(addr, value, res, user) +#define LoadDW(addr, value, res) _LoadDW(addr, value, res) + +#define StoreHW(addr, value, res) _StoreHW(addr, value, res, kernel) +#define StoreHWE(addr, value, res) _StoreHW(addr, value, res, user) +#define StoreW(addr, value, res) _StoreW(addr, value, res, kernel) +#define StoreWE(addr, value, res) _StoreW(addr, value, res, user) +#define StoreDW(addr, value, res) _StoreDW(addr, value, res) + +#endif /* _ASM_MIPS_UNALIGNED_EMUL_H */ diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h index 24dc3d35161c..4d2dae0c7c57 100644 --- a/arch/mips/include/asm/vermagic.h +++ b/arch/mips/include/asm/vermagic.h @@ -8,12 +8,16 @@ #define MODULE_PROC_FAMILY "MIPS32_R1 " #elif defined CONFIG_CPU_MIPS32_R2 #define MODULE_PROC_FAMILY "MIPS32_R2 " +#elif defined CONFIG_CPU_MIPS32_R5 +#define MODULE_PROC_FAMILY "MIPS32_R5 " #elif defined CONFIG_CPU_MIPS32_R6 #define MODULE_PROC_FAMILY "MIPS32_R6 " #elif defined CONFIG_CPU_MIPS64_R1 #define MODULE_PROC_FAMILY "MIPS64_R1 " #elif defined CONFIG_CPU_MIPS64_R2 #define MODULE_PROC_FAMILY "MIPS64_R2 " +#elif defined CONFIG_CPU_MIPS64_R5 +#define MODULE_PROC_FAMILY "MIPS64_R5 " #elif defined CONFIG_CPU_MIPS64_R6 #define MODULE_PROC_FAMILY "MIPS64_R6 " #elif defined CONFIG_CPU_R3000 @@ -46,6 +50,8 @@ #define MODULE_PROC_FAMILY "LOONGSON64 " #elif defined CONFIG_CPU_CAVIUM_OCTEON #define MODULE_PROC_FAMILY "OCTEON " +#elif defined CONFIG_CPU_P5600 +#define MODULE_PROC_FAMILY "P5600 " #elif defined CONFIG_CPU_XLR #define MODULE_PROC_FAMILY "XLR " #elif defined CONFIG_CPU_XLP diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index 1ade1daa4921..b7e02bdc1985 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -17,5 +17,6 @@ #define HWCAP_LOONGSON_MMI (1 << 11) #define HWCAP_LOONGSON_EXT (1 << 12) #define HWCAP_LOONGSON_EXT2 (1 << 13) +#define HWCAP_LOONGSON_CPUCFG (1 << 14) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index eaa3a80affdf..98f97c85e059 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -989,6 +989,30 @@ struct mm16_r5_format { /* Load/store from stack pointer format */ }; /* + * Loongson-3 overridden COP2 instruction formats (32-bit length) + */ +struct loongson3_lswc2_format { /* Loongson-3 overridden lwc2/swc2 Load/Store format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int base : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int fr : 1, + __BITFIELD_FIELD(unsigned int offset : 9, + __BITFIELD_FIELD(unsigned int ls : 1, + __BITFIELD_FIELD(unsigned int rq : 5, + ;))))))) +}; + +struct loongson3_lsdc2_format { /* Loongson-3 overridden ldc2/sdc2 Load/Store format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int base : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int index : 5, + __BITFIELD_FIELD(unsigned int offset : 8, + __BITFIELD_FIELD(unsigned int opcode1 : 3, + ;)))))) +}; + +/* * MIPS16e instruction formats (16-bit length) */ struct m16e_rr { @@ -1088,6 +1112,8 @@ union mips_instruction { struct mm16_rb_format mm16_rb_format; struct mm16_r3_format mm16_r3_format; struct mm16_r5_format mm16_r5_format; + struct loongson3_lswc2_format loongson3_lswc2_format; + struct loongson3_lsdc2_format loongson3_lsdc2_format; }; union mips16e_instruction { diff --git a/arch/mips/jazz/Platform b/arch/mips/jazz/Platform index 3373788acca1..eb0490ae8b09 100644 --- a/arch/mips/jazz/Platform +++ b/arch/mips/jazz/Platform @@ -1,6 +1,5 @@ # # Acer PICA 61, Mips Magnum 4000 and Olivetti M700. # -platform-$(CONFIG_MACH_JAZZ) += jazz/ cflags-$(CONFIG_MACH_JAZZ) += -I$(srctree)/arch/mips/include/asm/mach-jazz load-$(CONFIG_MACH_JAZZ) += 0xffffffff80080000 diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 412d2faa3cdf..9c2e8c15bb97 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -2,7 +2,14 @@ choice prompt "Machine type" depends on MACH_INGENIC - default JZ4740_QI_LB60 + default INGENIC_GENERIC_BOARD + +config INGENIC_GENERIC_BOARD + bool "Generic board" + select MACH_JZ4740 + select MACH_JZ4770 + select MACH_JZ4780 + select MACH_X1000 config JZ4740_QI_LB60 bool "Qi Hardware Ben NanoNote" diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 6de14c0deb4e..f96c0f5eca44 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -4,11 +4,6 @@ # # Object file lists. - -obj-y += prom.o time.o reset.o setup.o timer.o +obj-y += setup.o CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt - -# PM support - -obj-$(CONFIG_PM) += pm.o diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index a2a5a85ea1f9..bd35d0621b13 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform @@ -1,4 +1,3 @@ -platform-$(CONFIG_MACH_INGENIC) += jz4740/ cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000 zload-$(CONFIG_MACH_INGENIC) += 0xffffffff81000000 diff --git a/arch/mips/jz4740/pm.c b/arch/mips/jz4740/pm.c deleted file mode 100644 index f9b551f01f42..000000000000 --- a/arch/mips/jz4740/pm.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC power management support - */ - -#include <linux/init.h> -#include <linux/pm.h> -#include <linux/delay.h> -#include <linux/suspend.h> - -static int jz4740_pm_enter(suspend_state_t state) -{ - __asm__(".set\tmips3\n\t" - "wait\n\t" - ".set\tmips0"); - - - - return 0; -} - -static const struct platform_suspend_ops jz4740_pm_ops = { - .valid = suspend_valid_only_mem, - .enter = jz4740_pm_enter, -}; - -static int __init jz4740_pm_init(void) -{ - suspend_set_ops(&jz4740_pm_ops); - return 0; - -} -late_initcall(jz4740_pm_init); diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c deleted file mode 100644 index ff4555c3fb15..000000000000 --- a/arch/mips/jz4740/prom.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC prom code - */ - -#include <linux/init.h> - -#include <asm/bootinfo.h> -#include <asm/fw/fw.h> - -void __init prom_init(void) -{ - fw_init_cmdline(); -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c deleted file mode 100644 index 1f9f02e54085..000000000000 --- a/arch/mips/jz4740/reset.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - */ - -#include <asm/reboot.h> - -#include "reset.h" - -static void jz4740_halt(void) -{ - while (1) { - __asm__(".set push;\n" - ".set mips3;\n" - "wait;\n" - ".set pop;\n" - ); - } -} - -void jz4740_reset_init(void) -{ - _machine_halt = jz4740_halt; -} diff --git a/arch/mips/jz4740/reset.h b/arch/mips/jz4740/reset.h deleted file mode 100644 index 4e8746ee9b61..000000000000 --- a/arch/mips/jz4740/reset.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __MIPS_JZ4740_RESET_H__ -#define __MIPS_JZ4740_RESET_H__ - -extern void jz4740_reset_init(void); - -#endif diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index 880c26857aff..61468a87775c 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -5,17 +5,22 @@ * JZ4740 setup code */ +#include <linux/clocksource.h> #include <linux/init.h> #include <linux/io.h> #include <linux/irqchip.h> #include <linux/kernel.h> #include <linux/libfdt.h> +#include <linux/of_clk.h> #include <linux/of_fdt.h> +#include <linux/pm.h> +#include <linux/suspend.h> #include <asm/bootinfo.h> +#include <asm/fw/fw.h> #include <asm/prom.h> - -#include "reset.h" +#include <asm/reboot.h> +#include <asm/time.h> #define JZ4740_EMC_BASE_ADDR 0x13010000 @@ -61,8 +66,6 @@ void __init plat_mem_setup(void) int offset; void *dtb; - jz4740_reset_init(); - if (__dtb_start != __dtb_end) dtb = __dtb_start; else @@ -105,3 +108,56 @@ void __init arch_init_irq(void) { irqchip_init(); } + +void __init plat_time_init(void) +{ + of_clk_init(NULL); + timer_probe(); +} + +void __init prom_init(void) +{ + fw_init_cmdline(); +} + +void __init prom_free_prom_memory(void) +{ +} + +static void jz4740_wait_instr(void) +{ + __asm__(".set push;\n" + ".set mips3;\n" + "wait;\n" + ".set pop;\n" + ); +} + +static void jz4740_halt(void) +{ + for (;;) + jz4740_wait_instr(); +} + +static int __maybe_unused jz4740_pm_enter(suspend_state_t state) +{ + jz4740_wait_instr(); + + return 0; +} + +static const struct platform_suspend_ops jz4740_pm_ops __maybe_unused = { + .valid = suspend_valid_only_mem, + .enter = jz4740_pm_enter, +}; + +static int __init jz4740_pm_init(void) +{ + if (IS_ENABLED(CONFIG_PM_SLEEP)) + suspend_set_ops(&jz4740_pm_ops); + _machine_halt = jz4740_halt; + + return 0; + +} +late_initcall(jz4740_pm_init); diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c deleted file mode 100644 index 605a84a250bf..000000000000 --- a/arch/mips/jz4740/time.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform time support - */ - -#include <linux/clocksource.h> -#include <linux/of_clk.h> - -#include <asm/mach-jz4740/timer.h> - -void __init plat_time_init(void) -{ - of_clk_init(NULL); - jz4740_timer_init(); - timer_probe(); -} diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c deleted file mode 100644 index 5c9f82de6a82..000000000000 --- a/arch/mips/jz4740/timer.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform timer support - */ - -#include <linux/export.h> -#include <linux/io.h> -#include <linux/init.h> -#include <linux/kernel.h> - -#include <asm/mach-jz4740/base.h> -#include <asm/mach-jz4740/timer.h> - -void __iomem *jz4740_timer_base; -EXPORT_SYMBOL_GPL(jz4740_timer_base); - -void jz4740_timer_enable_watchdog(void) -{ - writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); -} -EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog); - -void jz4740_timer_disable_watchdog(void) -{ - writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); -} -EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog); - -void __init jz4740_timer_init(void) -{ - jz4740_timer_base = ioremap(JZ4740_TCU_BASE_ADDR, 0x100); - - if (!jz4740_timer_base) - panic("Failed to ioremap timer registers"); - - /* Disable all timer clocks except for those used as system timers */ - writel(0x000100fc, jz4740_timer_base + JZ_REG_TIMER_STOP_SET); - - /* Timer irqs are unmasked by default, mask them */ - writel(0x00ff00ff, jz4740_timer_base + JZ_REG_TIMER_MASK_SET); -} diff --git a/arch/mips/kernel/8250-platform.c b/arch/mips/kernel/8250-platform.c deleted file mode 100644 index 5c6b2ab1f56e..000000000000 --- a/arch/mips/kernel/8250-platform.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - */ -#include <linux/init.h> -#include <linux/serial_8250.h> - -#define PORT(base, int) \ -{ \ - .iobase = base, \ - .irq = int, \ - .uartclk = 1843200, \ - .iotype = UPIO_PORT, \ - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ - .regshift = 0, \ -} - -static struct plat_serial8250_port uart8250_data[] = { - PORT(0x3F8, 4), - PORT(0x2F8, 3), - PORT(0x3E8, 4), - PORT(0x2E8, 3), - { }, -}; - -static struct platform_device uart8250_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = uart8250_data, - }, -}; - -static int __init uart8250_init(void) -{ - return platform_device_register(&uart8250_device); -} - -module_init(uart8250_init); - -MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Generic 8250 UART probe driver"); diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index d6e97df51cfb..8c7a043295ed 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -98,8 +98,6 @@ obj-$(CONFIG_MIPSR2_TO_R6_EMULATOR) += mips-r2-to-r6-emul.o CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) -obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT) += 8250-platform.o - obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_mipsxx.o diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 2c38f75d87ff..fb3e203698ea 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -90,7 +90,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_bltz_op: if ((long)regs->regs[insn.mm_i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -106,7 +106,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_bgez_op: if ((long)regs->regs[insn.mm_i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -144,7 +144,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, unsigned int bit; bc_false = 1; - /* Fall through */ + fallthrough; case mm_bc2t_op: case mm_bc1t_op: preempt_disable(); @@ -178,7 +178,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_jalrs16_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_jr16_op: *contpc = regs->regs[insn.mm_i_format.rs]; return 1; @@ -239,7 +239,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_jal32_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_j32_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 27; @@ -432,7 +432,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, switch (insn.r_format.func) { case jalr_op: regs->regs[insn.r_format.rd] = epc + 8; - /* Fall through */ + fallthrough; case jr_op: if (NO_R6EMU && insn.r_format.func == jr_op) goto sigill_r2r6; @@ -451,7 +451,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bltzl_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -465,7 +465,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezl_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -561,7 +561,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; - /* fall through */ + fallthrough; case j_op: epc += 4; epc >>= 28; @@ -578,7 +578,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case beql_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) { @@ -593,7 +593,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bnel_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) { @@ -608,7 +608,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case blezl_op: /* not really i_format */ if (!insn.i_format.rt && NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case blez_op: /* * Compact branches for R6 for the @@ -644,7 +644,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bgtz_op: /* * Compact branches for R6 for the diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 17a9cbb8b3df..995ad9e69ded 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -8,6 +8,7 @@ */ #include <linux/clockchips.h> #include <linux/interrupt.h> +#include <linux/cpufreq.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/irq.h> @@ -250,6 +251,49 @@ unsigned int __weak get_c0_compare_int(void) return MIPS_CPU_IRQ_BASE + cp0_compare_irq; } +#ifdef CONFIG_CPU_FREQ + +static unsigned long mips_ref_freq; + +static int r4k_cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct clock_event_device *cd; + unsigned long rate; + int cpu; + + if (!mips_ref_freq) + mips_ref_freq = freq->old; + + if (val == CPUFREQ_POSTCHANGE) { + rate = cpufreq_scale(mips_hpt_frequency, mips_ref_freq, + freq->new); + + for_each_cpu(cpu, freq->policy->cpus) { + cd = &per_cpu(mips_clockevent_device, cpu); + + clockevents_update_freq(cd, rate); + } + } + + return 0; +} + +static struct notifier_block r4k_cpufreq_notifier = { + .notifier_call = r4k_cpufreq_callback, +}; + +static int __init r4k_register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&r4k_cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + +} +core_initcall(r4k_register_cpufreq_notifier); + +#endif /* !CONFIG_CPU_FREQ */ + int r4k_clockevent_init(void) { unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED; diff --git a/arch/mips/kernel/cps-vec-ns16550.S b/arch/mips/kernel/cps-vec-ns16550.S index d5a67b4ce9f6..30725e1df987 100644 --- a/arch/mips/kernel/cps-vec-ns16550.S +++ b/arch/mips/kernel/cps-vec-ns16550.S @@ -14,16 +14,30 @@ #define UART_TX_OFS (UART_TX << CONFIG_MIPS_CPS_NS16550_SHIFT) #define UART_LSR_OFS (UART_LSR << CONFIG_MIPS_CPS_NS16550_SHIFT) +#if CONFIG_MIPS_CPS_NS16550_WIDTH == 1 +# define UART_L lb +# define UART_S sb +#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 2 +# define UART_L lh +# define UART_S sh +#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 4 +# define UART_L lw +# define UART_S sw +#else +# define UART_L lb +# define UART_S sb +#endif + /** * _mips_cps_putc() - write a character to the UART * @a0: ASCII character to write * @t9: UART base address */ LEAF(_mips_cps_putc) -1: lw t0, UART_LSR_OFS(t9) +1: UART_L t0, UART_LSR_OFS(t9) andi t0, t0, UART_LSR_TEMT beqz t0, 1b - sb a0, UART_TX_OFS(t9) + UART_S a0, UART_TX_OFS(t9) jr ra END(_mips_cps_putc) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index f21a2304401f..6b93162d7c5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -28,6 +28,8 @@ #include <asm/spram.h> #include <linux/uaccess.h> +#include <asm/mach-loongson64/cpucfg-emul.h> + /* Hardware capabilities */ unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -92,6 +94,7 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) { if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { unsigned long sr, fir, fcsr, fcsr0, fcsr1; @@ -172,6 +175,7 @@ static void cpu_set_nofpu_2008(struct cpuinfo_mips *c) case STRICT: if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY; } else { @@ -263,9 +267,11 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c) value = 0; if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) value |= MIPS_FPIR_D | MIPS_FPIR_S; if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W; if (c->options & MIPS_CPU_NAN_2008) @@ -286,6 +292,7 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c) if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { if (c->fpu_id & MIPS_FPIR_3D) c->ases |= MIPS_ASE_MIPS3D; @@ -532,22 +539,26 @@ static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) static void set_isa(struct cpuinfo_mips *c, unsigned int isa) { switch (isa) { + case MIPS_CPU_ISA_M64R5: + c->isa_level |= MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5; + set_elf_base_platform("mips64r5"); + fallthrough; case MIPS_CPU_ISA_M64R2: c->isa_level |= MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2; set_elf_base_platform("mips64r2"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M64R1: c->isa_level |= MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1; set_elf_base_platform("mips64"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_V: c->isa_level |= MIPS_CPU_ISA_V; set_elf_base_platform("mips5"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_IV: c->isa_level |= MIPS_CPU_ISA_IV; set_elf_base_platform("mips4"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_III: c->isa_level |= MIPS_CPU_ISA_II | MIPS_CPU_ISA_III; set_elf_base_platform("mips3"); @@ -557,20 +568,24 @@ static void set_isa(struct cpuinfo_mips *c, unsigned int isa) case MIPS_CPU_ISA_M64R6: c->isa_level |= MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6; set_elf_base_platform("mips64r6"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M32R6: c->isa_level |= MIPS_CPU_ISA_M32R6; set_elf_base_platform("mips32r6"); /* Break here so we don't add incompatible ISAs */ break; + case MIPS_CPU_ISA_M32R5: + c->isa_level |= MIPS_CPU_ISA_M32R5; + set_elf_base_platform("mips32r5"); + fallthrough; case MIPS_CPU_ISA_M32R2: c->isa_level |= MIPS_CPU_ISA_M32R2; set_elf_base_platform("mips32r2"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M32R1: c->isa_level |= MIPS_CPU_ISA_M32R1; set_elf_base_platform("mips32"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_II: c->isa_level |= MIPS_CPU_ISA_II; set_elf_base_platform("mips2"); @@ -620,14 +635,14 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) config = read_c0_config6(); if (flags & FTLB_EN) - config |= MIPS_CONF6_FTLBEN; + config |= MIPS_CONF6_MTI_FTLBEN; else - config &= ~MIPS_CONF6_FTLBEN; + config &= ~MIPS_CONF6_MTI_FTLBEN; if (flags & FTLB_SET_PROB) { - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config &= ~(3 << MIPS_CONF6_MTI_FTLBP_SHIFT); config |= calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT; + << MIPS_CONF6_MTI_FTLBP_SHIFT; } write_c0_config6(config); @@ -647,10 +662,10 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) config = read_c0_config6(); if (flags & FTLB_EN) /* Enable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); + write_c0_config6(config & ~MIPS_CONF6_LOONGSON_FTLBDIS); else /* Disable FTLB */ - write_c0_config6(config | MIPS_CONF6_FTLBDIS); + write_c0_config6(config | MIPS_CONF6_LOONGSON_FTLBDIS); break; default: return 1; @@ -659,6 +674,52 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) return 0; } +static int mm_config(struct cpuinfo_mips *c) +{ + unsigned int config0, update, mm; + + config0 = read_c0_config(); + mm = config0 & MIPS_CONF_MM; + + /* + * It's implementation dependent what type of write-merge is supported + * and whether it can be enabled/disabled. If it is settable lets make + * the merging allowed by default. Some platforms might have + * write-through caching unsupported. In this case just ignore the + * CP0.Config.MM bit field value. + */ + switch (c->cputype) { + case CPU_24K: + case CPU_34K: + case CPU_74K: + case CPU_P5600: + case CPU_P6600: + c->options |= MIPS_CPU_MM_FULL; + update = MIPS_CONF_MM_FULL; + break; + case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + mm = 0; + fallthrough; + default: + update = 0; + break; + } + + if (update) { + config0 = (config0 & ~MIPS_CONF_MM) | update; + write_c0_config(config0); + } else if (mm == MIPS_CONF_MM_SYSAD) { + c->options |= MIPS_CPU_MM_SYSAD; + } else if (mm == MIPS_CONF_MM_FULL) { + c->options |= MIPS_CPU_MM_FULL; + } + + return 0; +} + static inline unsigned int decode_config0(struct cpuinfo_mips *c) { unsigned int config0; @@ -850,7 +911,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) MIPS_CONF4_VTLBSIZEEXT_SHIFT) * 0x40; c->tlbsize = c->tlbsizevtlb; ftlb_page = MIPS_CONF4_VFTLBPAGESIZE; - /* fall through */ + fallthrough; case MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT: if (mips_ftlb_disabled) break; @@ -1750,13 +1811,19 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) spram_config(); + mm_config(c); + switch (__get_cpu_type(c->cputype)) { + case CPU_M5150: + case CPU_P5600: + set_isa(c, MIPS_CPU_ISA_M32R5); + break; case CPU_I6500: c->options |= MIPS_CPU_SHARED_FTLB_ENTRIES; - /* fall-through */ + fallthrough; case CPU_I6400: c->options |= MIPS_CPU_SHARED_FTLB_RAM; - /* fall-through */ + fallthrough; default: break; } @@ -1932,10 +1999,53 @@ platform: } } +#ifdef CONFIG_CPU_LOONGSON64 +#include <loongson_regs.h> + +static inline void decode_cpucfg(struct cpuinfo_mips *c) +{ + u32 cfg1 = read_cpucfg(LOONGSON_CFG1); + u32 cfg2 = read_cpucfg(LOONGSON_CFG2); + u32 cfg3 = read_cpucfg(LOONGSON_CFG3); + + if (cfg1 & LOONGSON_CFG1_MMI) + c->ases |= MIPS_ASE_LOONGSON_MMI; + + if (cfg2 & LOONGSON_CFG2_LEXT1) + c->ases |= MIPS_ASE_LOONGSON_EXT; + + if (cfg2 & LOONGSON_CFG2_LEXT2) + c->ases |= MIPS_ASE_LOONGSON_EXT2; + + if (cfg2 & LOONGSON_CFG2_LSPW) + c->options |= MIPS_CPU_LDPTE; + + if (cfg3 & LOONGSON_CFG3_LCAMP) + c->ases |= MIPS_ASE_LOONGSON_CAM; +} + static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { + decode_configs(c); + switch (c->processor_id & PRID_IMP_MASK) { - case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */ + case PRID_IMP_LOONGSON_64R: /* Loongson-64 Reduced */ + switch (c->processor_id & PRID_REV_MASK) { + case PRID_REV_LOONGSON2K_R1_0: + case PRID_REV_LOONGSON2K_R1_1: + case PRID_REV_LOONGSON2K_R1_2: + case PRID_REV_LOONGSON2K_R1_3: + c->cputype = CPU_LOONGSON64; + __cpu_name[cpu] = "Loongson-2K"; + set_elf_platform(cpu, "gs264e"); + set_isa(c, MIPS_CPU_ISA_M64R2); + break; + } + c->writecombine = _CACHE_UNCACHED_ACCELERATED; + c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_EXT | + MIPS_ASE_LOONGSON_EXT2); + break; + case PRID_IMP_LOONGSON_64C: /* Loongson-3 Classic */ switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: @@ -1952,8 +2062,14 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) set_isa(c, MIPS_CPU_ISA_M64R2); break; } - - decode_configs(c); + /* + * Loongson-3 Classic did not implement MIPS standard TLBINV + * but implemented TLBINVF and EHINV. As currently we're only + * using these two features, enable MIPS_CPU_TLBINV as well. + * + * Also some early Loongson-3A2000 had wrong TLB type in Config + * register, we correct it here. + */ c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; c->writecombine = _CACHE_UNCACHED_ACCELERATED; c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | @@ -1964,17 +2080,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); - decode_configs(c); - c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; + decode_cpucfg(c); c->writecombine = _CACHE_UNCACHED_ACCELERATED; - c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | - MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); break; default: panic("Unknown Loongson Processor ID!"); break; } } +#else +static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } +#endif static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) { @@ -2028,7 +2144,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) default: break; } - /* fall-through */ + fallthrough; case PRID_IMP_XBURST_REV2: c->cputype = CPU_XBURST; c->writecombine = _CACHE_UNCACHED_ACCELERATED; @@ -2286,6 +2402,13 @@ void cpu_probe(void) cpu_probe_vmbits(c); + /* Synthesize CPUCFG data if running on Loongson processors; + * no-op otherwise. + * + * This looks at previously probed features, so keep this at bottom. + */ + loongson3_cpucfg_synthesize_data(c); + #ifdef CONFIG_64BIT if (cpu == 0) __ua_limit = ~((1ull << cpu_vmbits) - 1); diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index 437dda64fd7a..edc4afc080fa 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -6,6 +6,7 @@ * Copyright (C) 2007 by Ralf Baechle */ #include <linux/clocksource.h> +#include <linux/cpufreq.h> #include <linux/init.h> #include <linux/sched_clock.h> @@ -65,6 +66,45 @@ static bool rdhwr_count_usable(void) return false; } +#ifdef CONFIG_CPU_FREQ + +static bool __read_mostly r4k_clock_unstable; + +static void r4k_clocksource_unstable(char *reason) +{ + if (r4k_clock_unstable) + return; + + r4k_clock_unstable = true; + + pr_info("R4K timer is unstable due to %s\n", reason); + + clocksource_mark_unstable(&clocksource_mips); +} + +static int r4k_cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + if (val == CPUFREQ_POSTCHANGE) + r4k_clocksource_unstable("CPU frequency change"); + + return 0; +} + +static struct notifier_block r4k_cpufreq_notifier = { + .notifier_call = r4k_cpufreq_callback, +}; + +static int __init r4k_register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&r4k_cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + +} +core_initcall(r4k_register_cpufreq_notifier); + +#endif /* !CONFIG_CPU_FREQ */ + int __init init_r4k_clocksource(void) { if (!cpu_has_counter || !mips_hpt_frequency) diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 4849a48afc0f..4b896f5023ff 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -169,8 +169,8 @@ syscall_exit_work: jal syscall_trace_leave b resume_userspace -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \ - defined(CONFIG_MIPS_MT) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_MIPS_MT) /* * MIPS32R2 Instruction Hazard Barrier - must be called @@ -183,4 +183,4 @@ LEAF(mips_ihb) nop END(mips_ihb) -#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */ +#endif /* CONFIG_CPU_MIPSR2 - CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */ diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 0a43c9125267..a1b966f3578e 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -476,20 +476,20 @@ NESTED(nmi_handler, PT_SIZE, sp) .endm .macro __build_clear_fpe + CLI + TRACE_IRQS_OFF .set push /* gas fails to assemble cfc1 for some archs (octeon).*/ \ .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 .set pop - CLI - TRACE_IRQS_OFF .endm .macro __build_clear_msa_fpe - _cfcmsa a1, MSA_CSR CLI TRACE_IRQS_OFF + _cfcmsa a1, MSA_CSR .endm .macro __build_clear_ade @@ -501,17 +501,17 @@ NESTED(nmi_handler, PT_SIZE, sp) .macro __BUILD_silent exception .endm - /* Gas tries to parse the PRINT argument as a string containing + /* Gas tries to parse the ASM_PRINT argument as a string containing string escapes and emits bogus warnings if it believes to recognize an unknown escape code. So make the arguments start with an n and gas will believe \n is ok ... */ .macro __BUILD_verbose nexception LONG_L a1, PT_EPC(sp) #ifdef CONFIG_32BIT - PRINT("Got \nexception at %08lx\012") + ASM_PRINT("Got \nexception at %08lx\012") #endif #ifdef CONFIG_64BIT - PRINT("Got \nexception at %016lx\012") + ASM_PRINT("Got \nexception at %016lx\012") #endif .endm diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 351d40fe0859..3b02ffe46304 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -132,6 +132,9 @@ dtb_found: #endif MTC0 zero, CP0_CONTEXT # clear context register +#ifdef CONFIG_64BIT + MTC0 zero, CP0_XCONTEXT +#endif PTR_LA $28, init_thread_union /* Set the SP after an empty pt_regs. */ PTR_LI sp, _THREAD_SIZE - 32 - PT_SIZE diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 37f8e78e2869..5bc3b04693c7 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -180,7 +180,8 @@ void __init check_wait(void) break; case CPU_LOONGSON64: if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= - (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) cpu_wait = r4k_wait; break; @@ -201,7 +202,7 @@ void __init check_wait(void) */ if (IS_ENABLED(CONFIG_MIPS_EJTAG_FDC_TTY)) break; - /* fall through */ + fallthrough; case CPU_M14KC: case CPU_M14KEC: case CPU_24K: diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index cdb93ed91cde..f60af512c877 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -114,14 +114,56 @@ static char *cm2_core[8] = { "Exclusive/OK", "Exclusive/Data" }; +static char *cm2_l2_type[4] = { + [0x0] = "None", + [0x1] = "Tag RAM single/double ECC error", + [0x2] = "Data RAM single/double ECC error", + [0x3] = "WS RAM uncorrectable dirty parity" +}; + +static char *cm2_l2_instr[32] = { + [0x00] = "L2_NOP", + [0x01] = "L2_ERR_CORR", + [0x02] = "L2_TAG_INV", + [0x03] = "L2_WS_CLEAN", + [0x04] = "L2_RD_MDYFY_WR", + [0x05] = "L2_WS_MRU", + [0x06] = "L2_EVICT_LN2", + [0x07] = "0x07", + [0x08] = "L2_EVICT", + [0x09] = "L2_REFL", + [0x0a] = "L2_RD", + [0x0b] = "L2_WR", + [0x0c] = "L2_EVICT_MRU", + [0x0d] = "L2_SYNC", + [0x0e] = "L2_REFL_ERR", + [0x0f] = "0x0f", + [0x10] = "L2_INDX_WB_INV", + [0x11] = "L2_INDX_LD_TAG", + [0x12] = "L2_INDX_ST_TAG", + [0x13] = "L2_INDX_ST_DATA", + [0x14] = "L2_INDX_ST_ECC", + [0x15] = "0x15", + [0x16] = "0x16", + [0x17] = "0x17", + [0x18] = "L2_FTCH_AND_LCK", + [0x19] = "L2_HIT_INV", + [0x1a] = "L2_HIT_WB_INV", + [0x1b] = "L2_HIT_WB", + [0x1c] = "0x1c", + [0x1d] = "0x1d", + [0x1e] = "0x1e", + [0x1f] = "0x1f" +}; + static char *cm2_causes[32] = { "None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR", "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07", "0x08", "0x09", "0x0a", "0x0b", "0x0c", "0x0d", "0x0e", "0x0f", - "0x10", "0x11", "0x12", "0x13", - "0x14", "0x15", "0x16", "INTVN_WR_ERR", - "INTVN_RD_ERR", "0x19", "0x1a", "0x1b", + "0x10", "INTVN_WR_ERR", "INTVN_RD_ERR", "0x13", + "0x14", "0x15", "0x16", "0x17", + "L2_RD_UNCORR", "L2_WR_UNCORR", "L2_CORR", "0x1b", "0x1c", "0x1d", "0x1e", "0x1f" }; @@ -360,7 +402,7 @@ void mips_cm_error_report(void) "CCA=%lu TR=%s MCmd=%s STag=%lu " "SPort=%lu\n", cca_bits, cm2_tr[tr_bits], cm2_cmd[cmd_bits], stag_bits, sport_bits); - } else { + } else if (cause < 24) { /* glob state & sresp together */ unsigned long c3_bits = (cm_error >> 18) & 7; unsigned long c2_bits = (cm_error >> 15) & 7; @@ -377,6 +419,22 @@ void mips_cm_error_report(void) cm2_core[c1_bits], cm2_core[c0_bits], sc_bit ? "True" : "False", cm2_cmd[cmd_bits], sport_bits); + } else { + unsigned long muc_bit = (cm_error >> 23) & 1; + unsigned long ins_bits = (cm_error >> 18) & 0x1f; + unsigned long arr_bits = (cm_error >> 16) & 3; + unsigned long dw_bits = (cm_error >> 12) & 15; + unsigned long way_bits = (cm_error >> 9) & 7; + unsigned long mway_bit = (cm_error >> 8) & 1; + unsigned long syn_bits = (cm_error >> 0) & 0xFF; + + snprintf(buf, sizeof(buf), + "Type=%s%s Instr=%s DW=%lu Way=%lu " + "MWay=%s Syndrome=0x%02lx", + muc_bit ? "Multi-UC " : "", + cm2_l2_type[arr_bits], + cm2_l2_instr[ins_bits], dw_bits, way_bits, + mway_bit ? "True" : "False", syn_bits); } pr_err("CM_ERROR=%08llx %s <%s>\n", cm_error, cm2_causes[cause], buf); diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index b4d210bfcdae..a39ec755e4c2 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1109,7 +1109,7 @@ repeat: err = SIGILL; break; } - /* fall through */ + fallthrough; case beql_op: case bnel_op: if (delay_slot(regs)) { diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 128fc9999c56..efce5defcc5c 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -90,6 +90,7 @@ struct mips_pmu { unsigned int num_counters; }; +static int counter_bits; static struct mips_pmu mipspmu; #define M_PERFCTL_EVENT(event) (((event) << MIPS_PERFCTRL_EVENT_S) & \ @@ -118,6 +119,7 @@ static struct mips_pmu mipspmu; #define M_PERFCTL_CONFIG_MASK 0x1f #endif +#define CNTR_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS static DEFINE_RWLOCK(pmuint_rwlock); @@ -154,6 +156,31 @@ static void pause_local_counters(void); static irqreturn_t mipsxx_pmu_handle_irq(int, void *); static int mipsxx_pmu_handle_shared_irq(void); +/* 0: Not Loongson-3 + * 1: Loongson-3A1000/3B1000/3B1500 + * 2: Loongson-3A2000/3A3000 + * 3: Loongson-3A4000+ + */ + +#define LOONGSON_PMU_TYPE0 0 +#define LOONGSON_PMU_TYPE1 1 +#define LOONGSON_PMU_TYPE2 2 +#define LOONGSON_PMU_TYPE3 3 + +static inline int get_loongson3_pmu_type(void) +{ + if (boot_cpu_type() != CPU_LOONGSON64) + return LOONGSON_PMU_TYPE0; + if ((boot_cpu_data.processor_id & PRID_COMP_MASK) == PRID_COMP_LEGACY) + return LOONGSON_PMU_TYPE1; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64C) + return LOONGSON_PMU_TYPE2; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G) + return LOONGSON_PMU_TYPE3; + + return LOONGSON_PMU_TYPE0; +} + static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx) { if (vpe_id() == 1) @@ -186,17 +213,18 @@ static u64 mipsxx_pmu_read_counter(unsigned int idx) static u64 mipsxx_pmu_read_counter_64(unsigned int idx) { + u64 mask = CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { case 0: - return read_c0_perfcntr0_64(); + return read_c0_perfcntr0_64() & mask; case 1: - return read_c0_perfcntr1_64(); + return read_c0_perfcntr1_64() & mask; case 2: - return read_c0_perfcntr2_64(); + return read_c0_perfcntr2_64() & mask; case 3: - return read_c0_perfcntr3_64(); + return read_c0_perfcntr3_64() & mask; default: WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); return 0; @@ -225,6 +253,7 @@ static void mipsxx_pmu_write_counter(unsigned int idx, u64 val) static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val) { + val &= CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { @@ -286,12 +315,16 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { int i; + unsigned long cntr_mask; /* * We only need to care the counter mask. The range has been * checked definitely. */ - unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cntr_mask = (hwc->event_base >> 10) & 0xffff; + else + cntr_mask = (hwc->event_base >> 8) & 0xffff; for (i = mipspmu.num_counters - 1; i >= 0; i--) { /* @@ -320,10 +353,16 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= mipspmu.num_counters); - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | - (evt->config_base & M_PERFCTL_CONFIG_MASK) | - /* Make sure interrupt enabled. */ - MIPS_PERFCTRL_IE; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0x3ff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; + else + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { /* enable the counter for the calling thread */ @@ -396,6 +435,10 @@ static int mipspmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, mipspmu.overflow - left); + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + mipsxx_pmu_write_control(idx, + M_PERFCTL_EVENT(hwc->event_base & 0x3ff)); + mipspmu.write_counter(idx, mipspmu.overflow - left); perf_event_update_userpage(event); @@ -667,8 +710,14 @@ static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev) (pev->event_id & 0xff); else #endif /* CONFIG_MIPS_MT_SMP */ - return ((pev->cntr_mask & 0xffff00) | - (pev->event_id & 0xff)); + { + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + return (pev->cntr_mask & 0xfffc00) | + (pev->event_id & 0x3ff); + else + return (pev->cntr_mask & 0xffff00) | + (pev->event_id & 0xff); + } } static const struct mips_perf_event *mipspmu_map_general_event(int idx) @@ -783,26 +832,104 @@ static int n_counters(void) return counters; } +static void loongson3_reset_counters(void *arg) +{ + int counters = (int)(long)arg; + + switch (counters) { + case 4: + mipsxx_pmu_write_control(3, 0); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 127<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 191<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 255<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 319<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 383<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 575<<5); + mipspmu.write_counter(3, 0); + fallthrough; + case 3: + mipsxx_pmu_write_control(2, 0); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 127<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 191<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 255<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 319<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 383<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 575<<5); + mipspmu.write_counter(2, 0); + fallthrough; + case 2: + mipsxx_pmu_write_control(1, 0); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 127<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 191<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 255<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 319<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 383<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 575<<5); + mipspmu.write_counter(1, 0); + fallthrough; + case 1: + mipsxx_pmu_write_control(0, 0); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 127<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 191<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 255<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 319<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 383<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 575<<5); + mipspmu.write_counter(0, 0); + break; + } +} + static void reset_counters(void *arg) { int counters = (int)(long)arg; + + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + loongson3_reset_counters(arg); + return; + } + switch (counters) { case 4: mipsxx_pmu_write_control(3, 0); mipspmu.write_counter(3, 0); - /* fall through */ + fallthrough; case 3: mipsxx_pmu_write_control(2, 0); mipspmu.write_counter(2, 0); - /* fall through */ + fallthrough; case 2: mipsxx_pmu_write_control(1, 0); mipspmu.write_counter(1, 0); - /* fall through */ + fallthrough; case 1: mipsxx_pmu_write_control(0, 0); mipspmu.write_counter(0, 0); - /* fall through */ + break; } } @@ -834,13 +961,30 @@ static const struct mips_perf_event i6x00_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_MISSES] = { 0x16, CNTR_EVEN | CNTR_ODD }, }; -static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = { +static const struct mips_perf_event loongson3_event_map1[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD }, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN }, [PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD }, }; +static const struct mips_perf_event loongson3_event_map2[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x80, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x81, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x18, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x94, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x9c, CNTR_ALL }, +}; + +static const struct mips_perf_event loongson3_event_map3[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x1c, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x1d, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x08, CNTR_ALL }, +}; + static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL }, @@ -1064,7 +1208,7 @@ static const struct mips_perf_event i6x00_cache_map }, }; -static const struct mips_perf_event loongson3_cache_map +static const struct mips_perf_event loongson3_cache_map1 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -1109,12 +1253,127 @@ static const struct mips_perf_event loongson3_cache_map [C(BPU)] = { /* Using the same code for *HW_BRANCH* */ [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, + }, +}, +}; + +static const struct mips_perf_event loongson3_cache_map2 + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x156, CNTR_ALL }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, + [C(RESULT_ACCESS)] = { 0x155, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x153, CNTR_ALL }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1b6, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x1b7, CNTR_ALL }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0x1bf, CNTR_ALL }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x94, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x9c, CNTR_ALL }, + }, +}, +}; + +static const struct mips_perf_event loongson3_cache_map3 + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1f, CNTR_ALL }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa, CNTR_ALL }, + [C(RESULT_MISS)] = { 0xa9, CNTR_ALL }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1c, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1d, CNTR_ALL }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x2e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x2f, CNTR_ALL }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x14, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1b, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, }, }, }; @@ -1178,7 +1437,6 @@ static const struct mips_perf_event bmips5000_cache_map }, }; - static const struct mips_perf_event octeon_cache_map [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1512,6 +1770,7 @@ static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev) static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) { /* currently most cores have 7-bit event numbers */ + int pmu_type; unsigned int raw_id = config & 0xff; unsigned int base_id = raw_id & 0x7f; @@ -1624,8 +1883,33 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_id > 127 ? CNTR_ODD : CNTR_EVEN; break; case CPU_LOONGSON64: - raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; - break; + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + raw_event.cntr_mask = + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; + break; + case LOONGSON_PMU_TYPE2: + base_id = config & 0x3ff; + raw_event.cntr_mask = CNTR_ALL; + + if ((base_id >= 1 && base_id < 28) || + (base_id >= 64 && base_id < 90) || + (base_id >= 128 && base_id < 164) || + (base_id >= 192 && base_id < 200) || + (base_id >= 256 && base_id < 274) || + (base_id >= 320 && base_id < 358) || + (base_id >= 384 && base_id < 574)) + break; + + return ERR_PTR(-EOPNOTSUPP); + case LOONGSON_PMU_TYPE3: + base_id = raw_id; + raw_event.cntr_mask = CNTR_ALL; + break; + } + break; } raw_event.event_id = base_id; @@ -1683,8 +1967,7 @@ static const struct mips_perf_event *xlp_pmu_map_raw_event(u64 config) static int __init init_hw_perf_events(void) { - int counters, irq; - int counter_bits; + int counters, irq, pmu_type; pr_info("Performance counters: "); @@ -1771,8 +2054,25 @@ init_hw_perf_events(void) break; case CPU_LOONGSON64: mipspmu.name = "mips/loongson3"; - mipspmu.general_event_map = &loongson3_event_map; - mipspmu.cache_event_map = &loongson3_cache_map; + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + counters = 2; + mipspmu.general_event_map = &loongson3_event_map1; + mipspmu.cache_event_map = &loongson3_cache_map1; + break; + case LOONGSON_PMU_TYPE2: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map2; + mipspmu.cache_event_map = &loongson3_cache_map2; + break; + case LOONGSON_PMU_TYPE3: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map3; + mipspmu.cache_event_map = &loongson3_cache_map3; + break; + } break; case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: @@ -1803,19 +2103,26 @@ init_hw_perf_events(void) mipspmu.irq = irq; if (read_c0_perfctrl0() & MIPS_PERFCTRL_W) { - mipspmu.max_period = (1ULL << 63) - 1; - mipspmu.valid_count = (1ULL << 63) - 1; - mipspmu.overflow = 1ULL << 63; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + counter_bits = 48; + mipspmu.max_period = (1ULL << 47) - 1; + mipspmu.valid_count = (1ULL << 47) - 1; + mipspmu.overflow = 1ULL << 47; + } else { + counter_bits = 64; + mipspmu.max_period = (1ULL << 63) - 1; + mipspmu.valid_count = (1ULL << 63) - 1; + mipspmu.overflow = 1ULL << 63; + } mipspmu.read_counter = mipsxx_pmu_read_counter_64; mipspmu.write_counter = mipsxx_pmu_write_counter_64; - counter_bits = 64; } else { + counter_bits = 32; mipspmu.max_period = (1ULL << 31) - 1; mipspmu.valid_count = (1ULL << 31) - 1; mipspmu.overflow = 1ULL << 31; mipspmu.read_counter = mipsxx_pmu_read_counter; mipspmu.write_counter = mipsxx_pmu_write_counter; - counter_bits = 32; } on_each_cpu(reset_counters, (void *)(long)counters, 1); diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index f8d36710cd58..4184d641f05e 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -98,12 +98,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "%s", " mips32r1"); if (cpu_has_mips32r2) seq_printf(m, "%s", " mips32r2"); + if (cpu_has_mips32r5) + seq_printf(m, "%s", " mips32r5"); if (cpu_has_mips32r6) seq_printf(m, "%s", " mips32r6"); if (cpu_has_mips64r1) seq_printf(m, "%s", " mips64r1"); if (cpu_has_mips64r2) seq_printf(m, "%s", " mips64r2"); + if (cpu_has_mips64r5) + seq_printf(m, "%s", " mips64r5"); if (cpu_has_mips64r6) seq_printf(m, "%s", " mips64r6"); seq_printf(m, "\n"); diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 59be5c812aa2..b91e91106475 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -41,7 +41,7 @@ LEAF(_save_fp) EXPORT_SYMBOL(_save_fp) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) mfc0 t0, CP0_STATUS #endif fpu_save_double a0 t0 t1 # clobbers t1 @@ -53,7 +53,7 @@ EXPORT_SYMBOL(_save_fp) */ LEAF(_restore_fp) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) mfc0 t0, CP0_STATUS #endif fpu_restore_double a0 t0 t1 # clobbers t1 @@ -103,10 +103,10 @@ LEAF(_save_fp_context) .set pop #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) .set push SET_HARDFLOAT -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) .set mips32r2 .set fp=64 mfc0 t0, CP0_STATUS @@ -170,11 +170,11 @@ LEAF(_save_fp_context) LEAF(_restore_fp_context) EX lw t1, 0(a1) -#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) .set push SET_HARDFLOAT -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) .set mips32r2 .set fp=64 mfc0 t0, CP0_STATUS diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 41df8221bb8f..50c9a57e0d3a 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -41,7 +41,7 @@ NESTED(handle_sys, PT_SIZE, sp) #if 0 SAVE_ALL move a1, v0 - PRINT("Scall %ld\n") + ASM_PRINT("Scall %ld\n") RESTORE_ALL #endif diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 10bef8f78e7c..7b537fa2035d 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -575,7 +575,7 @@ static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname, #endif /* CONFIG_OF_EARLY_FLATTREE */ -static void __init bootcmdline_init(char **cmdline_p) +static void __init bootcmdline_init(void) { bool dt_bootargs = false; @@ -654,13 +654,11 @@ static void __init bootcmdline_init(char **cmdline_p) */ static void __init arch_mem_init(char **cmdline_p) { - extern void plat_mem_setup(void); - /* call board setup routine */ plat_mem_setup(); memblock_set_bottom_up(true); - bootcmdline_init(cmdline_p); + bootcmdline_init(); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -702,7 +700,17 @@ static void __init arch_mem_init(char **cmdline_p) memblock_reserve(crashk_res.start, resource_size(&crashk_res)); #endif device_tree_init(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); sparse_init(); + memblock_set_bottom_up(true); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); @@ -831,7 +839,7 @@ arch_initcall(debugfs_mips); /* User defined DMA coherency from command line. */ enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT; EXPORT_SYMBOL_GPL(coherentio); -int hw_coherentio = 0; /* Actual hardware supported DMA coherency setting. */ +int hw_coherentio; /* Actual hardware supported DMA coherency setting. */ static int __init setcoherentio(char *str) { diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index f6efabcb4e92..a0262729cd4c 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -52,7 +52,7 @@ struct sigframe { /* Matches struct ucontext from its uc_mcontext field onwards */ struct sigcontext sf_sc; sigset_t sf_mask; - unsigned long long sf_extcontext[0]; + unsigned long long sf_extcontext[]; }; struct rt_sigframe { @@ -824,7 +824,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->regs[2] = EINTR; break; } - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->regs[7] = regs->regs[26]; regs->regs[2] = regs->regs[0]; diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c index 26d355462ace..d5d96214cce5 100644 --- a/arch/mips/kernel/spram.c +++ b/arch/mips/kernel/spram.c @@ -209,11 +209,11 @@ void spram_config(void) case CPU_P6600: config0 = read_c0_config(); /* FIXME: addresses are Malta specific */ - if (config0 & (1<<24)) { + if (config0 & MIPS_CONF_ISP) { probe_spram("ISPRAM", 0x1c000000, &ispram_load_tag, &ispram_store_tag); } - if (config0 & (1<<23)) + if (config0 & MIPS_CONF_DSP) probe_spram("DSPRAM", 0x1c100000, &dspram_load_tag, &dspram_store_tag); } diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 37e9413a393d..caa01457dce6 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -18,12 +18,82 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> #include <asm/cpu-features.h> #include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/time.h> +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref); +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref_freq); +static unsigned long glb_lpj_ref; +static unsigned long glb_lpj_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpumask *cpus = freq->policy->cpus; + unsigned long lpj; + int cpu; + + /* + * Skip lpj numbers adjustment if the CPU-freq transition is safe for + * the loops delay. (Is this possible?) + */ + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + /* Save the initial values of the lpjes for future scaling. */ + if (!glb_lpj_ref) { + glb_lpj_ref = boot_cpu_data.udelay_val; + glb_lpj_ref_freq = freq->old; + + for_each_online_cpu(cpu) { + per_cpu(pcp_lpj_ref, cpu) = + cpu_data[cpu].udelay_val; + per_cpu(pcp_lpj_ref_freq, cpu) = freq->old; + } + } + + /* + * Adjust global lpj variable and per-CPU udelay_val number in + * accordance with the new CPU frequency. + */ + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + loops_per_jiffy = cpufreq_scale(glb_lpj_ref, + glb_lpj_ref_freq, + freq->new); + + for_each_cpu(cpu, cpus) { + lpj = cpufreq_scale(per_cpu(pcp_lpj_ref, cpu), + per_cpu(pcp_lpj_ref_freq, cpu), + freq->new); + cpu_data[cpu].udelay_val = (unsigned int)lpj; + } + } + + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + /* * forward reference */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 31968cbd6464..22f805a73921 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -71,6 +71,8 @@ #include <asm/tlbex.h> #include <asm/uasm.h> +#include <asm/mach-loongson64/cpucfg-emul.h> + extern void check_wait(void); extern asmlinkage void rollback_handle_int(void); extern asmlinkage void handle_int(void); @@ -693,6 +695,48 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) return -1; /* Must be something else ... */ } +/* + * Loongson-3 CSR instructions emulation + */ + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + +#define LWC2 0xc8000000 +#define RS BASE +#define CSR_OPCODE2 0x00000118 +#define CSR_OPCODE2_MASK 0x000007ff +#define CSR_FUNC_MASK RT +#define CSR_FUNC_CPUCFG 0x8 + +static int simulate_loongson3_cpucfg(struct pt_regs *regs, + unsigned int opcode) +{ + int op = opcode & OPCODE; + int op2 = opcode & CSR_OPCODE2_MASK; + int csr_func = (opcode & CSR_FUNC_MASK) >> 16; + + if (op == LWC2 && op2 == CSR_OPCODE2 && csr_func == CSR_FUNC_CPUCFG) { + int rd = (opcode & RD) >> 11; + int rs = (opcode & RS) >> 21; + __u64 sel = regs->regs[rs]; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + /* Do not emulate on unsupported core models. */ + if (!loongson3_cpucfg_emulation_enabled(¤t_cpu_data)) + return -1; + + regs->regs[rd] = loongson3_cpucfg_read_synthesized( + ¤t_cpu_data, sel); + + return 0; + } + + /* Not ours. */ + return -1; +} +#endif /* CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION */ + asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; @@ -1166,6 +1210,11 @@ no_r2_instr: if (status < 0) status = simulate_fp(regs, opcode, old_epc, old31); + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + if (status < 0) + status = simulate_loongson3_cpucfg(regs, opcode); +#endif } else if (cpu_has_mmips) { unsigned short mmop[2] = { 0 }; @@ -1401,8 +1450,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) force_sig(SIGILL); break; } - /* Fall through. */ - + fallthrough; case 1: { void __user *fault_addr; unsigned long fcr31; diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index ca6fc4762d97..0adce604fa44 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -89,12 +89,10 @@ #include <asm/fpu.h> #include <asm/fpu_emulator.h> #include <asm/inst.h> +#include <asm/unaligned-emul.h> #include <asm/mmu_context.h> #include <linux/uaccess.h> -#define STR(x) __STR(x) -#define __STR(x) #x - enum { UNALIGNED_ACTION_QUIET, UNALIGNED_ACTION_SIGNAL, @@ -108,778 +106,6 @@ static u32 unaligned_action; #endif extern void show_registers(struct pt_regs *regs); -#ifdef __BIG_ENDIAN -#define _LoadHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ (".set\tnoat\n" \ - "1:\t"type##_lb("%0", "0(%2)")"\n" \ - "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\t.set\tat\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "(%2)")"\n" \ - "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl instruction */ -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n" \ - ".set\tnoat\n\t" \ - "1:"type##_lb("%0", "0(%2)")"\n\t" \ - "2:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "3(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#define _LoadHWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_lbu("%0", "0(%2)")"\n" \ - "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".set\tat\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "(%2)")"\n" \ - "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ - "dsll\t%0, %0, 32\n\t" \ - "dsrl\t%0, %0, 32\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tldl\t%0, (%2)\n" \ - "2:\tldr\t%0, 7(%2)\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl and ldl instructions */ -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_lbu("%0", "0(%2)")"\n\t" \ - "2:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "3(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:lb\t%0, 0(%2)\n\t" \ - "2:lbu\t $1, 1(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:lbu\t$1, 2(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:lbu\t$1, 3(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "5:lbu\t$1, 4(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "6:lbu\t$1, 5(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "7:lbu\t$1, 6(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "8:lbu\t$1, 7(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n\t" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - - -#define _StoreHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_sb("%1", "1(%2)")"\n" \ - "srl\t$1, %1, 0x8\n" \ - "2:\t"type##_sb("$1", "0(%2)")"\n" \ - ".set\tat\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT));\ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_swl("%1", "(%2)")"\n" \ - "2:\t"type##_swr("%1", "3(%2)")"\n\t"\ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tsdl\t%1,(%2)\n" \ - "2:\tsdr\t%1, 7(%2)\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_sb("%1", "3(%2)")"\n\t" \ - "srl\t$1, %1, 0x8\n\t" \ - "2:"type##_sb("$1", "2(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "3:"type##_sb("$1", "1(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "4:"type##_sb("$1", "0(%2)")"\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:sb\t%1, 7(%2)\n\t" \ - "dsrl\t$1, %1, 0x8\n\t" \ - "2:sb\t$1, 6(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "3:sb\t$1, 5(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "4:sb\t$1, 4(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "5:sb\t$1, 3(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "6:sb\t$1, 2(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "7:sb\t$1, 1(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "8:sb\t$1, 0(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#else /* __BIG_ENDIAN */ - -#define _LoadHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ (".set\tnoat\n" \ - "1:\t"type##_lb("%0", "1(%2)")"\n" \ - "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\t.set\tat\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "3(%2)")"\n" \ - "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl instruction */ -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n" \ - ".set\tnoat\n\t" \ - "1:"type##_lb("%0", "3(%2)")"\n\t" \ - "2:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "0(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - - -#define _LoadHWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_lbu("%0", "1(%2)")"\n" \ - "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".set\tat\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "3(%2)")"\n" \ - "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ - "dsll\t%0, %0, 32\n\t" \ - "dsrl\t%0, %0, 32\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tldl\t%0, 7(%2)\n" \ - "2:\tldr\t%0, (%2)\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl and ldl instructions */ -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_lbu("%0", "3(%2)")"\n\t" \ - "2:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "0(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:lb\t%0, 7(%2)\n\t" \ - "2:lbu\t$1, 6(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:lbu\t$1, 5(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:lbu\t$1, 4(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "5:lbu\t$1, 3(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "6:lbu\t$1, 2(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "7:lbu\t$1, 1(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "8:lbu\t$1, 0(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n\t" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#define _StoreHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_sb("%1", "0(%2)")"\n" \ - "srl\t$1,%1, 0x8\n" \ - "2:\t"type##_sb("$1", "1(%2)")"\n" \ - ".set\tat\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT));\ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_swl("%1", "3(%2)")"\n" \ - "2:\t"type##_swr("%1", "(%2)")"\n\t"\ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tsdl\t%1, 7(%2)\n" \ - "2:\tsdr\t%1, (%2)\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without swl and sdl instructions */ -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_sb("%1", "0(%2)")"\n\t" \ - "srl\t$1, %1, 0x8\n\t" \ - "2:"type##_sb("$1", "1(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "3:"type##_sb("$1", "2(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "4:"type##_sb("$1", "3(%2)")"\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:sb\t%1, 0(%2)\n\t" \ - "dsrl\t$1, %1, 0x8\n\t" \ - "2:sb\t$1, 1(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "3:sb\t$1, 2(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "4:sb\t$1, 3(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "5:sb\t$1, 4(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "6:sb\t$1, 5(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "7:sb\t$1, 6(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "8:sb\t$1, 7(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ -#endif - -#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) -#define LoadHWUE(addr, value, res) _LoadHWU(addr, value, res, user) -#define LoadWU(addr, value, res) _LoadWU(addr, value, res, kernel) -#define LoadWUE(addr, value, res) _LoadWU(addr, value, res, user) -#define LoadHW(addr, value, res) _LoadHW(addr, value, res, kernel) -#define LoadHWE(addr, value, res) _LoadHW(addr, value, res, user) -#define LoadW(addr, value, res) _LoadW(addr, value, res, kernel) -#define LoadWE(addr, value, res) _LoadW(addr, value, res, user) -#define LoadDW(addr, value, res) _LoadDW(addr, value, res) - -#define StoreHW(addr, value, res) _StoreHW(addr, value, res, kernel) -#define StoreHWE(addr, value, res) _StoreHW(addr, value, res, user) -#define StoreW(addr, value, res) _StoreW(addr, value, res, kernel) -#define StoreWE(addr, value, res) _StoreW(addr, value, res, user) -#define StoreDW(addr, value, res) _StoreDW(addr, value, res) - static void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int __user *pc) { diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a5f00ec73ea6..f185a85a27c1 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -55,7 +55,7 @@ SECTIONS /* . = 0xa800000000300000; */ . = 0xffffffff80300000; #endif - . = VMLINUX_LOAD_ADDRESS; + . = LINKER_LOAD_ADDRESS; /* read-only */ _text = .; /* Text and read-only data */ .text : { diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c index ba73b4077668..c9263b95cb2e 100644 --- a/arch/mips/kernel/watch.c +++ b/arch/mips/kernel/watch.c @@ -27,15 +27,15 @@ void mips_install_watch_registers(struct task_struct *t) case 4: write_c0_watchlo3(watches->watchlo[3]); write_c0_watchhi3(watchhi | watches->watchhi[3]); - /* fall through */ + fallthrough; case 3: write_c0_watchlo2(watches->watchlo[2]); write_c0_watchhi2(watchhi | watches->watchhi[2]); - /* fall through */ + fallthrough; case 2: write_c0_watchlo1(watches->watchlo[1]); write_c0_watchhi1(watchhi | watches->watchhi[1]); - /* fall through */ + fallthrough; case 1: write_c0_watchlo0(watches->watchlo[0]); write_c0_watchhi0(watchhi | watches->watchhi[0]); @@ -58,13 +58,13 @@ void mips_read_watch_registers(void) BUG(); case 4: watches->watchhi[3] = (read_c0_watchhi3() & watchhi_mask); - /* fall through */ + fallthrough; case 3: watches->watchhi[2] = (read_c0_watchhi2() & watchhi_mask); - /* fall through */ + fallthrough; case 2: watches->watchhi[1] = (read_c0_watchhi1() & watchhi_mask); - /* fall through */ + fallthrough; case 1: watches->watchhi[0] = (read_c0_watchhi0() & watchhi_mask); } @@ -91,25 +91,25 @@ void mips_clear_watch_registers(void) BUG(); case 8: write_c0_watchlo7(0); - /* fall through */ + fallthrough; case 7: write_c0_watchlo6(0); - /* fall through */ + fallthrough; case 6: write_c0_watchlo5(0); - /* fall through */ + fallthrough; case 5: write_c0_watchlo4(0); - /* fall through */ + fallthrough; case 4: write_c0_watchlo3(0); - /* fall through */ + fallthrough; case 3: write_c0_watchlo2(0); - /* fall through */ + fallthrough; case 2: write_c0_watchlo1(0); - /* fall through */ + fallthrough; case 1: write_c0_watchlo0(0); } diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 754094b40a75..7ccf9b096783 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -64,7 +64,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, switch (insn.r_format.func) { case jalr_op: arch->gprs[insn.r_format.rd] = epc + 8; - /* Fall through */ + fallthrough; case jr_op: nextpc = arch->gprs[insn.r_format.rs]; break; @@ -140,7 +140,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, /* These are unconditional and in j_format. */ case jal_op: arch->gprs[31] = instpc + 8; - /* fall through */ + fallthrough; case j_op: epc += 4; epc >>= 28; @@ -1724,14 +1724,14 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, case lhu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; case lh_op: run->mmio.len = 2; break; case lbu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; case lb_op: run->mmio.len = 1; break; @@ -1790,7 +1790,7 @@ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), return EMULATE_EXCEPT; default: break; - }; + } } } @@ -1965,7 +1965,7 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, break; default: goto unknown; - }; + } break; unknown: #endif diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 8f05dd0a0f4e..3b0148c99c0d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -39,40 +39,41 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) struct kvm_stats_debugfs_item debugfs_entries[] = { - { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, - { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, - { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, - { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, - { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, - { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, - { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, - { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, - { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, - { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, - { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, - { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, - { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, - { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU }, - { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, - { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, - { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, + VCPU_STAT("wait", wait_exits), + VCPU_STAT("cache", cache_exits), + VCPU_STAT("signal", signal_exits), + VCPU_STAT("interrupt", int_exits), + VCPU_STAT("cop_unusable", cop_unusable_exits), + VCPU_STAT("tlbmod", tlbmod_exits), + VCPU_STAT("tlbmiss_ld", tlbmiss_ld_exits), + VCPU_STAT("tlbmiss_st", tlbmiss_st_exits), + VCPU_STAT("addrerr_st", addrerr_st_exits), + VCPU_STAT("addrerr_ld", addrerr_ld_exits), + VCPU_STAT("syscall", syscall_exits), + VCPU_STAT("resvd_inst", resvd_inst_exits), + VCPU_STAT("break_inst", break_inst_exits), + VCPU_STAT("trap_inst", trap_inst_exits), + VCPU_STAT("msa_fpe", msa_fpe_exits), + VCPU_STAT("fpe", fpe_exits), + VCPU_STAT("msa_disabled", msa_disabled_exits), + VCPU_STAT("flush_dcache", flush_dcache_exits), #ifdef CONFIG_KVM_MIPS_VZ - { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU }, - { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU }, - { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU }, - { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU }, - { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU }, - { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU }, - { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU }, - { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU }, + VCPU_STAT("vz_gpsi", vz_gpsi_exits), + VCPU_STAT("vz_gsfc", vz_gsfc_exits), + VCPU_STAT("vz_hc", vz_hc_exits), + VCPU_STAT("vz_grr", vz_grr_exits), + VCPU_STAT("vz_gva", vz_gva_exits), + VCPU_STAT("vz_ghfc", vz_ghfc_exits), + VCPU_STAT("vz_gpa", vz_gpa_exits), + VCPU_STAT("vz_resvd", vz_resvd_exits), #endif - { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, - { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), {NULL} }; @@ -80,13 +81,13 @@ bool kvm_trace_guest_mode_change; int kvm_guest_mode_change_trace_reg(void) { - kvm_trace_guest_mode_change = 1; + kvm_trace_guest_mode_change = true; return 0; } void kvm_guest_mode_change_trace_unreg(void) { - kvm_trace_guest_mode_change = 0; + kvm_trace_guest_mode_change = false; } /* @@ -284,8 +285,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (swq_has_sleeper(&vcpu->wq)) - swake_up_one(&vcpu->wq); + rcuwait_wake_up(&vcpu->wait); return kvm_mips_count_timeout(vcpu); } @@ -439,8 +439,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r = -EINTR; vcpu_load(vcpu); @@ -511,8 +512,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; - if (swq_has_sleeper(&dvcpu->wq)) - swake_up_one(&dvcpu->wq); + rcuwait_wake_up(&dvcpu->wait); return 0; } diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7cd92166a0b9..5d436c5216cc 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -469,7 +469,7 @@ void kvm_vz_local_flush_guesttlb_all(void) cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; write_c0_cvmmemctl2(cvmmemctl2); break; - }; + } /* Invalidate guest entries in guest TLB */ write_gc0_entrylo0(0); @@ -486,7 +486,7 @@ void kvm_vz_local_flush_guesttlb_all(void) if (cvmmemctl2) { cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; write_c0_cvmmemctl2(cvmmemctl2); - }; + } write_gc0_index(old_index); write_gc0_entryhi(old_entryhi); diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index dde20887a70d..51f51009a53f 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -1118,7 +1118,7 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, break; default: break; - }; + } kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], @@ -1183,7 +1183,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), 0); goto unknown; - }; + } trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); @@ -1192,7 +1192,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, break; default: goto unknown; - }; + } break; unknown: @@ -1946,7 +1946,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, default: *v = (long)kvm_read_c0_guest_prid(cop0); break; - }; + } break; case KVM_REG_MIPS_CP0_EBASE: *v = kvm_vz_read_gc0_ebase(); @@ -2185,7 +2185,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, default: kvm_write_c0_guest_prid(cop0, v); break; - }; + } break; case KVM_REG_MIPS_CP0_EBASE: kvm_vz_write_gc0_ebase(v); @@ -2980,7 +2980,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) */ /* PageGrain */ - if (cpu_has_mips_r6) + if (cpu_has_mips_r5 || cpu_has_mips_r6) kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); /* Wired */ if (cpu_has_mips_r6) @@ -2988,7 +2988,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) read_gc0_wired() & MIPSR6_WIRED_LIMIT); /* Status */ kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); - if (cpu_has_mips_r6) + if (cpu_has_mips_r5 || cpu_has_mips_r6) kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); /* IntCtl */ kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & @@ -3086,7 +3086,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) } /* reset HTW registers */ - if (cpu_guest_has_htw && cpu_has_mips_r6) { + if (cpu_guest_has_htw && (cpu_has_mips_r5 || cpu_has_mips_r6)) { /* PWField */ kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); /* PWSize */ diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform index b3ec49838fd7..0bc9c0fbd431 100644 --- a/arch/mips/lantiq/Platform +++ b/arch/mips/lantiq/Platform @@ -2,7 +2,6 @@ # Lantiq # -platform-$(CONFIG_LANTIQ) += lantiq/ cflags-$(CONFIG_LANTIQ) += -I$(srctree)/arch/mips/include/asm/mach-lantiq load-$(CONFIG_LANTIQ) = 0xffffffff80002000 cflags-$(CONFIG_SOC_TYPE_XWAY) += -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway diff --git a/arch/mips/lasat/Kconfig b/arch/mips/lasat/Kconfig deleted file mode 100644 index 11b89e94b835..000000000000 --- a/arch/mips/lasat/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PICVUE - tristate "PICVUE LCD display driver" - depends on LASAT - -config PICVUE_PROC - tristate "PICVUE LCD display driver /proc interface" - depends on PICVUE && PROC_FS - -config DS1603 - bool "DS1603 RTC driver" - depends on LASAT - -config LASAT_SYSCTL - bool "LASAT sysctl interface" - depends on LASAT diff --git a/arch/mips/lasat/Makefile b/arch/mips/lasat/Makefile deleted file mode 100644 index 1789b227ef20..000000000000 --- a/arch/mips/lasat/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the LASAT specific kernel interface routines under Linux. -# - -obj-y += reset.o setup.o prom.o lasat_board.o \ - at93c.o interrupt.o serial.o - -obj-$(CONFIG_LASAT_SYSCTL) += sysctl.o -obj-$(CONFIG_DS1603) += ds1603.o -obj-$(CONFIG_PICVUE) += picvue.o -obj-$(CONFIG_PICVUE_PROC) += picvue_proc.o - -clean: - make -C image clean diff --git a/arch/mips/lasat/Platform b/arch/mips/lasat/Platform deleted file mode 100644 index 760252828bf1..000000000000 --- a/arch/mips/lasat/Platform +++ /dev/null @@ -1,7 +0,0 @@ -# -# LASAT platforms -# -platform-$(CONFIG_LASAT) += lasat/ -cflags-$(CONFIG_LASAT) += \ - -I$(srctree)/arch/mips/include/asm/mach-lasat -load-$(CONFIG_LASAT) += 0xffffffff80000000 diff --git a/arch/mips/lasat/at93c.c b/arch/mips/lasat/at93c.c deleted file mode 100644 index f895fe94b937..000000000000 --- a/arch/mips/lasat/at93c.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Atmel AT93C46 serial eeprom driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -#include <linux/kernel.h> -#include <linux/delay.h> -#include <asm/lasat/lasat.h> - -#include "at93c.h" - -#define AT93C_ADDR_SHIFT 7 -#define AT93C_ADDR_MAX ((1 << AT93C_ADDR_SHIFT) - 1) -#define AT93C_RCMD (0x6 << AT93C_ADDR_SHIFT) -#define AT93C_WCMD (0x5 << AT93C_ADDR_SHIFT) -#define AT93C_WENCMD 0x260 -#define AT93C_WDSCMD 0x200 - -struct at93c_defs *at93c; - -static void at93c_reg_write(u32 val) -{ - *at93c->reg = val; -} - -static u32 at93c_reg_read(void) -{ - u32 tmp = *at93c->reg; - return tmp; -} - -static u32 at93c_datareg_read(void) -{ - u32 tmp = *at93c->rdata_reg; - return tmp; -} - -static void at93c_cycle_clk(u32 data) -{ - at93c_reg_write(data | at93c->clk); - lasat_ndelay(250); - at93c_reg_write(data & ~at93c->clk); - lasat_ndelay(250); -} - -static void at93c_write_databit(u8 bit) -{ - u32 data = at93c_reg_read(); - if (bit) - data |= 1 << at93c->wdata_shift; - else - data &= ~(1 << at93c->wdata_shift); - - at93c_reg_write(data); - lasat_ndelay(100); - at93c_cycle_clk(data); -} - -static unsigned int at93c_read_databit(void) -{ - u32 data; - - at93c_cycle_clk(at93c_reg_read()); - data = (at93c_datareg_read() >> at93c->rdata_shift) & 1; - return data; -} - -static u8 at93c_read_byte(void) -{ - int i; - u8 data = 0; - - for (i = 0; i <= 7; i++) { - data <<= 1; - data |= at93c_read_databit(); - } - return data; -} - -static void at93c_write_bits(u32 data, int size) -{ - int i; - int shift = size - 1; - u32 mask = (1 << shift); - - for (i = 0; i < size; i++) { - at93c_write_databit((data & mask) >> shift); - data <<= 1; - } -} - -static void at93c_init_op(void) -{ - at93c_reg_write((at93c_reg_read() | at93c->cs) & - ~at93c->clk & ~(1 << at93c->rdata_shift)); - lasat_ndelay(50); -} - -static void at93c_end_op(void) -{ - at93c_reg_write(at93c_reg_read() & ~at93c->cs); - lasat_ndelay(250); -} - -static void at93c_wait(void) -{ - at93c_init_op(); - while (!at93c_read_databit()) - ; - at93c_end_op(); -}; - -static void at93c_disable_wp(void) -{ - at93c_init_op(); - at93c_write_bits(AT93C_WENCMD, 10); - at93c_end_op(); -} - -static void at93c_enable_wp(void) -{ - at93c_init_op(); - at93c_write_bits(AT93C_WDSCMD, 10); - at93c_end_op(); -} - -u8 at93c_read(u8 addr) -{ - u8 byte; - at93c_init_op(); - at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_RCMD, 10); - byte = at93c_read_byte(); - at93c_end_op(); - return byte; -} - -void at93c_write(u8 addr, u8 data) -{ - at93c_disable_wp(); - at93c_init_op(); - at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_WCMD, 10); - at93c_write_bits(data, 8); - at93c_end_op(); - at93c_wait(); - at93c_enable_wp(); -} diff --git a/arch/mips/lasat/at93c.h b/arch/mips/lasat/at93c.h deleted file mode 100644 index 7a99a02d81d0..000000000000 --- a/arch/mips/lasat/at93c.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Atmel AT93C46 serial eeprom driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ - -extern struct at93c_defs { - volatile u32 *reg; - volatile u32 *rdata_reg; - int rdata_shift; - int wdata_shift; - u32 cs; - u32 clk; -} *at93c; - -u8 at93c_read(u8 addr); -void at93c_write(u8 addr, u8 data); diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c deleted file mode 100644 index e6ce39fefa78..000000000000 --- a/arch/mips/lasat/ds1603.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Dallas Semiconductors 1603 RTC driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#include <linux/kernel.h> -#include <asm/lasat/lasat.h> -#include <linux/delay.h> -#include <asm/lasat/ds1603.h> -#include <asm/time.h> - -#include "ds1603.h" - -#define READ_TIME_CMD 0x81 -#define SET_TIME_CMD 0x80 -#define TRIMMER_SET_CMD 0xC0 -#define TRIMMER_VALUE_MASK 0x38 -#define TRIMMER_SHIFT 3 - -struct ds_defs *ds1603; - -/* HW specific register functions */ -static void rtc_reg_write(unsigned long val) -{ - *ds1603->reg = val; -} - -static unsigned long rtc_reg_read(void) -{ - unsigned long tmp = *ds1603->reg; - return tmp; -} - -static unsigned long rtc_datareg_read(void) -{ - unsigned long tmp = *ds1603->data_reg; - return tmp; -} - -static void rtc_nrst_high(void) -{ - rtc_reg_write(rtc_reg_read() | ds1603->rst); -} - -static void rtc_nrst_low(void) -{ - rtc_reg_write(rtc_reg_read() & ~ds1603->rst); -} - -static void rtc_cycle_clock(unsigned long data) -{ - data |= ds1603->clk; - rtc_reg_write(data); - lasat_ndelay(250); - if (ds1603->data_reversed) - data &= ~ds1603->data; - else - data |= ds1603->data; - data &= ~ds1603->clk; - rtc_reg_write(data); - lasat_ndelay(250 + ds1603->huge_delay); -} - -static void rtc_write_databit(unsigned int bit) -{ - unsigned long data = rtc_reg_read(); - if (ds1603->data_reversed) - bit = !bit; - if (bit) - data |= ds1603->data; - else - data &= ~ds1603->data; - - rtc_reg_write(data); - lasat_ndelay(50 + ds1603->huge_delay); - rtc_cycle_clock(data); -} - -static unsigned int rtc_read_databit(void) -{ - unsigned int data; - - data = (rtc_datareg_read() & (1 << ds1603->data_read_shift)) - >> ds1603->data_read_shift; - rtc_cycle_clock(rtc_reg_read()); - return data; -} - -static void rtc_write_byte(unsigned int byte) -{ - int i; - - for (i = 0; i <= 7; i++) { - rtc_write_databit(byte & 1L); - byte >>= 1; - } -} - -static void rtc_write_word(unsigned long word) -{ - int i; - - for (i = 0; i <= 31; i++) { - rtc_write_databit(word & 1L); - word >>= 1; - } -} - -static unsigned long rtc_read_word(void) -{ - int i; - unsigned long word = 0; - unsigned long shift = 0; - - for (i = 0; i <= 31; i++) { - word |= rtc_read_databit() << shift; - shift++; - } - return word; -} - -static void rtc_init_op(void) -{ - rtc_nrst_high(); - - rtc_reg_write(rtc_reg_read() & ~ds1603->clk); - - lasat_ndelay(50); -} - -static void rtc_end_op(void) -{ - rtc_nrst_low(); - lasat_ndelay(1000); -} - -void read_persistent_clock64(struct timespec64 *ts) -{ - unsigned long word; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - rtc_init_op(); - rtc_write_byte(READ_TIME_CMD); - word = rtc_read_word(); - rtc_end_op(); - spin_unlock_irqrestore(&rtc_lock, flags); - - ts->tv_sec = word; - ts->tv_nsec = 0; -} - -int update_persistent_clock64(struct timespec64 now) -{ - time64_t time = now.tv_sec; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - rtc_init_op(); - rtc_write_byte(SET_TIME_CMD); - /* - * Due to the hardware limitation, we cast to 'unsigned long' type, - * so it will overflow in year 2106 on 32-bit machine. - */ - rtc_write_word((unsigned long)time); - rtc_end_op(); - spin_unlock_irqrestore(&rtc_lock, flags); - - return 0; -} - -void ds1603_set_trimmer(unsigned int trimval) -{ - rtc_init_op(); - rtc_write_byte(((trimval << TRIMMER_SHIFT) & TRIMMER_VALUE_MASK) - | (TRIMMER_SET_CMD)); - rtc_end_op(); -} - -void ds1603_disable(void) -{ - ds1603_set_trimmer(TRIMMER_DISABLE_RTC); -} - -void ds1603_enable(void) -{ - ds1603_set_trimmer(TRIMMER_DEFAULT); -} diff --git a/arch/mips/lasat/ds1603.h b/arch/mips/lasat/ds1603.h deleted file mode 100644 index 00987d3bdc21..000000000000 --- a/arch/mips/lasat/ds1603.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Dallas Semiconductors 1603 RTC driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#ifndef __DS1603_H -#define __DS1603_H - -struct ds_defs { - volatile u32 *reg; - volatile u32 *data_reg; - u32 rst; - u32 clk; - u32 data; - u32 data_read_shift; - char data_reversed; - u32 huge_delay; -}; - -extern struct ds_defs *ds1603; - -void ds1603_set_trimmer(unsigned int); -void ds1603_enable(void); -void ds1603_disable(void); -void ds1603_init(struct ds_defs *); - -#define TRIMMER_DEFAULT 3 -#define TRIMMER_DISABLE_RTC 0 - -#endif diff --git a/arch/mips/lasat/image/Makefile b/arch/mips/lasat/image/Makefile deleted file mode 100644 index 78ce4cff1012..000000000000 --- a/arch/mips/lasat/image/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# MAKEFILE FOR THE MIPS LINUX BOOTLOADER AND ROM DEBUGGER -# -# i-data Networks -# -# Author: Thomas Horsten <thh@i-data.com> -# - -ifndef Version - Version = "$(USER)-test" -endif - -MKLASATIMG = mklasatimg -MKLASATIMG_ARCH = mq2,mqpro,sp100,sp200 -KERNEL_IMAGE = vmlinux - -LDSCRIPT= -L$(srctree)/$(src) -Tromscript.normal - -HEAD_DEFINES := -D_kernel_start=$(VMLINUX_LOAD_ADDRESS) \ - -D_kernel_entry=$(VMLINUX_ENTRY_ADDRESS) \ - -D VERSION="\"$(Version)\"" \ - -D TIMESTAMP=$(shell date +%s) - -$(obj)/head.o: $(obj)/head.S $(KERNEL_IMAGE) - $(CC) -fno-pic $(HEAD_DEFINES) $(LINUXINCLUDE) -c -o $@ $< - -OBJECTS = head.o kImage.o - -rom.sw: $(obj)/rom.sw -rom.bin: $(obj)/rom.bin - -$(obj)/rom.sw: $(obj)/rom.bin - $(MKLASATIMG) -o $@ -k $^ -m $(MKLASATIMG_ARCH) - -$(obj)/rom.bin: $(obj)/rom - $(OBJCOPY) -O binary -S $^ $@ - -# Rule to make the bootloader -$(obj)/rom: $(addprefix $(obj)/,$(OBJECTS)) - $(LD) $(KBUILD_LDFLAGS) $(LDSCRIPT) -o $@ $^ - -$(obj)/%.o: $(obj)/%.gz - $(LD) -r -o $@ -b binary $< - -$(obj)/%.gz: $(obj)/%.bin - gzip -cf -9 $< > $@ - -$(obj)/kImage.bin: $(KERNEL_IMAGE) - $(OBJCOPY) -O binary -S $^ $@ - -clean: - rm -f rom rom.bin rom.sw kImage.bin kImage.o diff --git a/arch/mips/lasat/image/head.S b/arch/mips/lasat/image/head.S deleted file mode 100644 index 1a27312d4c2e..000000000000 --- a/arch/mips/lasat/image/head.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/lasat/head.h> - - .text - .section .text..start, "ax" - .set noreorder - .set mips3 - - /* Magic words identifying a software image */ - .word LASAT_K_MAGIC0_VAL - .word LASAT_K_MAGIC1_VAL - - /* Image header version */ - .word 0x00000002 - - /* image start and size */ - .word _image_start - .word _image_size - - /* start of kernel and entrypoint in uncompressed image */ - .word _kernel_start - .word _kernel_entry - - /* Here we have room for future flags */ - - .org 0x40 -reldate: - .word TIMESTAMP - - .org 0x50 -release: - .string VERSION diff --git a/arch/mips/lasat/image/romscript.normal b/arch/mips/lasat/image/romscript.normal deleted file mode 100644 index 0864c963e188..000000000000 --- a/arch/mips/lasat/image/romscript.normal +++ /dev/null @@ -1,23 +0,0 @@ -OUTPUT_ARCH(mips) - -SECTIONS -{ - .text : - { - *(.text..start) - } - - /* Data in ROM */ - - .data ALIGN(0x10) : - { - *(.data) - } - _image_start = ADDR(.data); - _image_size = SIZEOF(.data); - - .other : - { - *(.*) - } -} diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c deleted file mode 100644 index 7965bbd0d319..000000000000 --- a/arch/mips/lasat/interrupt.c +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Routines for generic manipulation of the interrupts found on the - * Lasat boards. - */ -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> - -#include <asm/irq_cpu.h> -#include <asm/lasat/lasat.h> -#include <asm/lasat/lasatint.h> - -#include <irq.h> - -static volatile int *lasat_int_status; -static volatile int *lasat_int_mask; -static volatile int lasat_int_mask_shift; - -void disable_lasat_irq(struct irq_data *d) -{ - unsigned int irq_nr = d->irq - LASAT_IRQ_BASE; - - *lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift; -} - -void enable_lasat_irq(struct irq_data *d) -{ - unsigned int irq_nr = d->irq - LASAT_IRQ_BASE; - - *lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift; -} - -static struct irq_chip lasat_irq_type = { - .name = "Lasat", - .irq_mask = disable_lasat_irq, - .irq_unmask = enable_lasat_irq, -}; - -static inline int ls1bit32(unsigned int x) -{ - int b = 31, s; - - s = 16; if (x << 16 == 0) s = 0; b -= s; x <<= s; - s = 8; if (x << 8 == 0) s = 0; b -= s; x <<= s; - s = 4; if (x << 4 == 0) s = 0; b -= s; x <<= s; - s = 2; if (x << 2 == 0) s = 0; b -= s; x <<= s; - s = 1; if (x << 1 == 0) s = 0; b -= s; - - return b; -} - -static unsigned long (*get_int_status)(void); - -static unsigned long get_int_status_100(void) -{ - return *lasat_int_status & *lasat_int_mask; -} - -static unsigned long get_int_status_200(void) -{ - unsigned long int_status; - - int_status = *lasat_int_status; - int_status &= (int_status >> LASATINT_MASK_SHIFT_200) & 0xffff; - return int_status; -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned long int_status; - unsigned int cause = read_c0_cause(); - int irq; - - if (cause & CAUSEF_IP7) { /* R4000 count / compare IRQ */ - do_IRQ(7); - return; - } - - int_status = get_int_status(); - - /* if int_status == 0, then the interrupt has already been cleared */ - if (int_status) { - irq = LASAT_IRQ_BASE + ls1bit32(int_status); - - do_IRQ(irq); - } -} - -void __init arch_init_irq(void) -{ - int irq = LASAT_CASCADE_IRQ; - int i; - - if (IS_LASAT_200()) { - lasat_int_status = (void *)LASAT_INT_STATUS_REG_200; - lasat_int_mask = (void *)LASAT_INT_MASK_REG_200; - lasat_int_mask_shift = LASATINT_MASK_SHIFT_200; - get_int_status = get_int_status_200; - *lasat_int_mask &= 0xffff; - } else { - lasat_int_status = (void *)LASAT_INT_STATUS_REG_100; - lasat_int_mask = (void *)LASAT_INT_MASK_REG_100; - lasat_int_mask_shift = LASATINT_MASK_SHIFT_100; - get_int_status = get_int_status_100; - *lasat_int_mask = 0; - } - - mips_cpu_irq_init(); - - for (i = LASAT_IRQ_BASE; i <= LASAT_IRQ_END; i++) - irq_set_chip_and_handler(i, &lasat_irq_type, handle_level_irq); - - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); -} diff --git a/arch/mips/lasat/lasat_board.c b/arch/mips/lasat/lasat_board.c deleted file mode 100644 index 80e1ba541148..000000000000 --- a/arch/mips/lasat/lasat_board.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Routines specific to the LASAT boards - */ -#include <linux/types.h> -#include <linux/crc32.h> -#include <asm/lasat/lasat.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/mutex.h> -#include <asm/addrspace.h> -#include "at93c.h" -/* New model description table */ -#include "lasat_models.h" - -static DEFINE_MUTEX(lasat_eeprom_mutex); - -#define EEPROM_CRC(data, len) (~crc32(~0, data, len)) - -struct lasat_info lasat_board_info; - -int EEPROMRead(unsigned int pos, unsigned char *data, int len) -{ - int i; - - for (i = 0; i < len; i++) - *data++ = at93c_read(pos++); - - return 0; -} - -int EEPROMWrite(unsigned int pos, unsigned char *data, int len) -{ - int i; - - for (i = 0; i < len; i++) - at93c_write(pos++, *data++); - - return 0; -} - -static void init_flash_sizes(void) -{ - unsigned long *lb = lasat_board_info.li_flashpart_base; - unsigned long *ls = lasat_board_info.li_flashpart_size; - int i; - - ls[LASAT_MTD_BOOTLOADER] = 0x40000; - ls[LASAT_MTD_SERVICE] = 0xC0000; - ls[LASAT_MTD_NORMAL] = 0x100000; - - if (!IS_LASAT_200()) { - lasat_board_info.li_flash_base = 0x1e000000; - - lb[LASAT_MTD_BOOTLOADER] = 0x1e400000; - - if (lasat_board_info.li_flash_size > 0x200000) { - ls[LASAT_MTD_CONFIG] = 0x100000; - ls[LASAT_MTD_FS] = 0x500000; - } - } else { - lasat_board_info.li_flash_base = 0x10000000; - - if (lasat_board_info.li_flash_size < 0x1000000) { - lb[LASAT_MTD_BOOTLOADER] = 0x10000000; - ls[LASAT_MTD_CONFIG] = 0x100000; - if (lasat_board_info.li_flash_size >= 0x400000) - ls[LASAT_MTD_FS] = - lasat_board_info.li_flash_size - 0x300000; - } - } - - for (i = 1; i < LASAT_MTD_LAST; i++) - lb[i] = lb[i-1] + ls[i-1]; -} - -int lasat_init_board_info(void) -{ - int c; - unsigned long crc; - unsigned long cfg0, cfg1; - const struct product_info *ppi; - int i_n_base_models = N_BASE_MODELS; - const char * const * i_txt_base_models = txt_base_models; - int i_n_prids = N_PRIDS; - - memset(&lasat_board_info, 0, sizeof(lasat_board_info)); - - /* First read the EEPROM info */ - EEPROMRead(0, (unsigned char *)&lasat_board_info.li_eeprom_info, - sizeof(struct lasat_eeprom_struct)); - - /* Check the CRC */ - crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info), - sizeof(struct lasat_eeprom_struct) - 4); - - if (crc != lasat_board_info.li_eeprom_info.crc32) { - printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM CRC does " - "not match calculated, attempting to soldier on...\n"); - } - - if (lasat_board_info.li_eeprom_info.version != LASAT_EEPROM_VERSION) { - printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM version " - "%d, wanted version %d, attempting to soldier on...\n", - (unsigned int)lasat_board_info.li_eeprom_info.version, - LASAT_EEPROM_VERSION); - } - - cfg0 = lasat_board_info.li_eeprom_info.cfg[0]; - cfg1 = lasat_board_info.li_eeprom_info.cfg[1]; - - if (LASAT_W0_DSCTYPE(cfg0) != 1) { - printk(KERN_WARNING "WARNING...\nWARNING...\n" - "Invalid configuration read from EEPROM, attempting to " - "soldier on..."); - } - /* We have a valid configuration */ - - switch (LASAT_W0_SDRAMBANKSZ(cfg0)) { - case 0: - lasat_board_info.li_memsize = 0x0800000; - break; - case 1: - lasat_board_info.li_memsize = 0x1000000; - break; - case 2: - lasat_board_info.li_memsize = 0x2000000; - break; - case 3: - lasat_board_info.li_memsize = 0x4000000; - break; - case 4: - lasat_board_info.li_memsize = 0x8000000; - break; - default: - lasat_board_info.li_memsize = 0; - } - - switch (LASAT_W0_SDRAMBANKS(cfg0)) { - case 0: - break; - case 1: - lasat_board_info.li_memsize *= 2; - break; - default: - break; - } - - switch (LASAT_W0_BUSSPEED(cfg0)) { - case 0x0: - lasat_board_info.li_bus_hz = 60000000; - break; - case 0x1: - lasat_board_info.li_bus_hz = 66000000; - break; - case 0x2: - lasat_board_info.li_bus_hz = 66666667; - break; - case 0x3: - lasat_board_info.li_bus_hz = 80000000; - break; - case 0x4: - lasat_board_info.li_bus_hz = 83333333; - break; - case 0x5: - lasat_board_info.li_bus_hz = 100000000; - break; - } - - switch (LASAT_W0_CPUCLK(cfg0)) { - case 0x0: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz; - break; - case 0x1: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - (lasat_board_info.li_bus_hz >> 1); - break; - case 0x2: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz; - break; - case 0x3: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz + - (lasat_board_info.li_bus_hz >> 1); - break; - case 0x4: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz; - break; - } - - /* Flash size */ - switch (LASAT_W1_FLASHSIZE(cfg1)) { - case 0: - lasat_board_info.li_flash_size = 0x200000; - break; - case 1: - lasat_board_info.li_flash_size = 0x400000; - break; - case 2: - lasat_board_info.li_flash_size = 0x800000; - break; - case 3: - lasat_board_info.li_flash_size = 0x1000000; - break; - case 4: - lasat_board_info.li_flash_size = 0x2000000; - break; - } - - init_flash_sizes(); - - lasat_board_info.li_bmid = LASAT_W0_BMID(cfg0); - lasat_board_info.li_prid = lasat_board_info.li_eeprom_info.prid; - if (lasat_board_info.li_prid == 0xffff || lasat_board_info.li_prid == 0) - lasat_board_info.li_prid = lasat_board_info.li_bmid; - - /* Base model stuff */ - if (lasat_board_info.li_bmid > i_n_base_models) - lasat_board_info.li_bmid = i_n_base_models; - strcpy(lasat_board_info.li_bmstr, - i_txt_base_models[lasat_board_info.li_bmid]); - - /* Product ID dependent values */ - c = lasat_board_info.li_prid; - if (c >= i_n_prids) { - strcpy(lasat_board_info.li_namestr, "Unknown Model"); - strcpy(lasat_board_info.li_typestr, "Unknown Type"); - } else { - ppi = &vendor_info_table[0].vi_product_info[c]; - strcpy(lasat_board_info.li_namestr, ppi->pi_name); - if (ppi->pi_type) - strcpy(lasat_board_info.li_typestr, ppi->pi_type); - else - sprintf(lasat_board_info.li_typestr, "%d", 10 * c); - } - - return 0; -} - -void lasat_write_eeprom_info(void) -{ - unsigned long crc; - - mutex_lock(&lasat_eeprom_mutex); - - /* Generate the CRC */ - crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info), - sizeof(struct lasat_eeprom_struct) - 4); - lasat_board_info.li_eeprom_info.crc32 = crc; - - /* Write the EEPROM info */ - EEPROMWrite(0, (unsigned char *)&lasat_board_info.li_eeprom_info, - sizeof(struct lasat_eeprom_struct)); - - mutex_unlock(&lasat_eeprom_mutex); -} diff --git a/arch/mips/lasat/lasat_models.h b/arch/mips/lasat/lasat_models.h deleted file mode 100644 index 474e57342484..000000000000 --- a/arch/mips/lasat/lasat_models.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Model description tables - */ -#include <linux/kernel.h> - -struct product_info { - const char *pi_name; - const char *pi_type; -}; - -struct vendor_info { - const char *vi_name; - const struct product_info *vi_product_info; -}; - -/* - * Base models - */ -static const char * const txt_base_models[] = { - "MQ 2", "MQ Pro", "SP 25", "SP 50", "SP 100", "SP 5000", "SP 7000", - "SP 1000", "Unknown" -}; -#define N_BASE_MODELS (ARRAY_SIZE(txt_base_models) - 1) - -/* - * Eicon Networks - */ -static const char txt_en_mq[] = "Masquerade"; -static const char txt_en_sp[] = "Safepipe"; - -static const struct product_info product_info_eicon[] = { - { txt_en_mq, "II" }, /* 0 */ - { txt_en_mq, "Pro" }, /* 1 */ - { txt_en_sp, "25" }, /* 2 */ - { txt_en_sp, "50" }, /* 3 */ - { txt_en_sp, "100" }, /* 4 */ - { txt_en_sp, "5000" }, /* 5 */ - { txt_en_sp, "7000" }, /* 6 */ - { txt_en_sp, "30" }, /* 7 */ - { txt_en_sp, "5100" }, /* 8 */ - { txt_en_sp, "7100" }, /* 9 */ - { txt_en_sp, "1110" }, /* 10 */ - { txt_en_sp, "3020" }, /* 11 */ - { txt_en_sp, "3030" }, /* 12 */ - { txt_en_sp, "5020" }, /* 13 */ - { txt_en_sp, "5030" }, /* 14 */ - { txt_en_sp, "1120" }, /* 15 */ - { txt_en_sp, "1130" }, /* 16 */ - { txt_en_sp, "6010" }, /* 17 */ - { txt_en_sp, "6110" }, /* 18 */ - { txt_en_sp, "6210" }, /* 19 */ - { txt_en_sp, "1020" }, /* 20 */ - { txt_en_sp, "1040" }, /* 21 */ - { txt_en_sp, "1050" }, /* 22 */ - { txt_en_sp, "1060" }, /* 23 */ -}; - -#define N_PRIDS ARRAY_SIZE(product_info_eicon) - -/* - * The vendor table - */ -static struct vendor_info const vendor_info_table[] = { - { "Eicon Networks", product_info_eicon }, -}; - -#define N_VENDORS ARRAY_SIZE(vendor_info_table) diff --git a/arch/mips/lasat/picvue.c b/arch/mips/lasat/picvue.c deleted file mode 100644 index 08298ccf5ccf..000000000000 --- a/arch/mips/lasat/picvue.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#include <linux/kernel.h> -#include <linux/delay.h> -#include <asm/bootinfo.h> -#include <asm/lasat/lasat.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/string.h> - -#include "picvue.h" - -#define PVC_BUSY 0x80 -#define PVC_NLINES 2 -#define PVC_DISPMEM 80 -#define PVC_LINELEN PVC_DISPMEM / PVC_NLINES - -struct pvc_defs *picvue; - -static void pvc_reg_write(u32 val) -{ - *picvue->reg = val; -} - -static u32 pvc_reg_read(void) -{ - u32 tmp = *picvue->reg; - return tmp; -} - -static void pvc_write_byte(u32 data, u8 byte) -{ - data |= picvue->e; - pvc_reg_write(data); - data &= ~picvue->data_mask; - data |= byte << picvue->data_shift; - pvc_reg_write(data); - ndelay(220); - pvc_reg_write(data & ~picvue->e); - ndelay(220); -} - -static u8 pvc_read_byte(u32 data) -{ - u8 byte; - - data |= picvue->e; - pvc_reg_write(data); - ndelay(220); - byte = (pvc_reg_read() & picvue->data_mask) >> picvue->data_shift; - data &= ~picvue->e; - pvc_reg_write(data); - ndelay(220); - return byte; -} - -static u8 pvc_read_data(void) -{ - u32 data = pvc_reg_read(); - u8 byte; - data |= picvue->rw; - data &= ~picvue->rs; - pvc_reg_write(data); - ndelay(40); - byte = pvc_read_byte(data); - data |= picvue->rs; - pvc_reg_write(data); - return byte; -} - -#define TIMEOUT 1000 -static int pvc_wait(void) -{ - int i = TIMEOUT; - int err = 0; - - while ((pvc_read_data() & PVC_BUSY) && i) - i--; - if (i == 0) - err = -ETIME; - - return err; -} - -#define MODE_INST 0 -#define MODE_DATA 1 -static void pvc_write(u8 byte, int mode) -{ - u32 data = pvc_reg_read(); - data &= ~picvue->rw; - if (mode == MODE_DATA) - data |= picvue->rs; - else - data &= ~picvue->rs; - pvc_reg_write(data); - ndelay(40); - pvc_write_byte(data, byte); - if (mode == MODE_DATA) - data &= ~picvue->rs; - else - data |= picvue->rs; - pvc_reg_write(data); - pvc_wait(); -} - -void pvc_write_string(const unsigned char *str, u8 addr, int line) -{ - int i = 0; - - if (line > 0 && (PVC_NLINES > 1)) - addr += 0x40 * line; - pvc_write(0x80 | addr, MODE_INST); - - while (*str != 0 && i < PVC_LINELEN) { - pvc_write(*str++, MODE_DATA); - i++; - } -} - -void pvc_write_string_centered(const unsigned char *str, int line) -{ - int len = strlen(str); - u8 addr; - - if (len > PVC_VISIBLE_CHARS) - addr = 0; - else - addr = (PVC_VISIBLE_CHARS - strlen(str))/2; - - pvc_write_string(str, addr, line); -} - -void pvc_dump_string(const unsigned char *str) -{ - int len = strlen(str); - - pvc_write_string(str, 0, 0); - if (len > PVC_VISIBLE_CHARS) - pvc_write_string(&str[PVC_VISIBLE_CHARS], 0, 1); -} - -#define BM_SIZE 8 -#define MAX_PROGRAMMABLE_CHARS 8 -int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE]) -{ - int i; - int addr; - - if (charnum > MAX_PROGRAMMABLE_CHARS) - return -ENOENT; - - addr = charnum * 8; - pvc_write(0x40 | addr, MODE_INST); - - for (i = 0; i < BM_SIZE; i++) - pvc_write(bitmap[i], MODE_DATA); - return 0; -} - -#define FUNC_SET_CMD 0x20 -#define EIGHT_BYTE (1 << 4) -#define FOUR_BYTE 0 -#define TWO_LINES (1 << 3) -#define ONE_LINE 0 -#define LARGE_FONT (1 << 2) -#define SMALL_FONT 0 - -static void pvc_funcset(u8 cmd) -{ - pvc_write(FUNC_SET_CMD | (cmd & (EIGHT_BYTE|TWO_LINES|LARGE_FONT)), - MODE_INST); -} - -#define ENTRYMODE_CMD 0x4 -#define AUTO_INC (1 << 1) -#define AUTO_DEC 0 -#define CURSOR_FOLLOWS_DISP (1 << 0) - -static void pvc_entrymode(u8 cmd) -{ - pvc_write(ENTRYMODE_CMD | (cmd & (AUTO_INC|CURSOR_FOLLOWS_DISP)), - MODE_INST); -} - -#define DISP_CNT_CMD 0x08 -#define DISP_OFF 0 -#define DISP_ON (1 << 2) -#define CUR_ON (1 << 1) -#define CUR_BLINK (1 << 0) -void pvc_dispcnt(u8 cmd) -{ - pvc_write(DISP_CNT_CMD | (cmd & (DISP_ON|CUR_ON|CUR_BLINK)), MODE_INST); -} - -#define MOVE_CMD 0x10 -#define DISPLAY (1 << 3) -#define CURSOR 0 -#define RIGHT (1 << 2) -#define LEFT 0 -void pvc_move(u8 cmd) -{ - pvc_write(MOVE_CMD | (cmd & (DISPLAY|RIGHT)), MODE_INST); -} - -#define CLEAR_CMD 0x1 -void pvc_clear(void) -{ - pvc_write(CLEAR_CMD, MODE_INST); -} - -#define HOME_CMD 0x2 -void pvc_home(void) -{ - pvc_write(HOME_CMD, MODE_INST); -} - -int pvc_init(void) -{ - u8 cmd = EIGHT_BYTE; - - if (PVC_NLINES == 2) - cmd |= (SMALL_FONT|TWO_LINES); - else - cmd |= (LARGE_FONT|ONE_LINE); - pvc_funcset(cmd); - pvc_dispcnt(DISP_ON); - pvc_entrymode(AUTO_INC); - - pvc_clear(); - pvc_write_string_centered("Display", 0); - pvc_write_string_centered("Initialized", 1); - - return 0; -} - -module_init(pvc_init); -MODULE_LICENSE("GPL"); diff --git a/arch/mips/lasat/picvue.h b/arch/mips/lasat/picvue.h deleted file mode 100644 index 161d3bf50811..000000000000 --- a/arch/mips/lasat/picvue.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -struct pvc_defs { - volatile u32 *reg; - u32 data_shift; - u32 data_mask; - u32 e; - u32 rw; - u32 rs; -}; - -extern struct pvc_defs *picvue; - -#define PVC_NLINES 2 -#define PVC_DISPMEM 80 -#define PVC_LINELEN PVC_DISPMEM / PVC_NLINES -#define PVC_VISIBLE_CHARS 16 - -void pvc_write_string(const unsigned char *str, u8 addr, int line); -void pvc_write_string_centered(const unsigned char *str, int line); -void pvc_dump_string(const unsigned char *str); - -#define BM_SIZE 8 -#define MAX_PROGRAMMABLE_CHARS 8 -int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE]); - -void pvc_dispcnt(u8 cmd); -#define DISP_OFF 0 -#define DISP_ON (1 << 2) -#define CUR_ON (1 << 1) -#define CUR_BLINK (1 << 0) - -void pvc_move(u8 cmd); -#define DISPLAY (1 << 3) -#define CURSOR 0 -#define RIGHT (1 << 2) -#define LEFT 0 - -void pvc_clear(void); -void pvc_home(void); diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c deleted file mode 100644 index 61c033494af5..000000000000 --- a/arch/mips/lasat/picvue_proc.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -#include <linux/bug.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/errno.h> - -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/interrupt.h> - -#include <linux/timer.h> -#include <linux/mutex.h> -#include <linux/uaccess.h> - -#include "picvue.h" - -static DEFINE_MUTEX(pvc_mutex); -static char pvc_lines[PVC_NLINES][PVC_LINELEN+1]; -static int pvc_linedata[PVC_NLINES]; -static char *pvc_linename[PVC_NLINES] = {"line1", "line2"}; -#define DISPLAY_DIR_NAME "display" -static int scroll_dir, scroll_interval; - -static struct timer_list timer; - -static void pvc_display(unsigned long data) -{ - int i; - - pvc_clear(); - for (i = 0; i < PVC_NLINES; i++) - pvc_write_string(pvc_lines[i], 0, i); -} - -static DECLARE_TASKLET(pvc_display_tasklet, &pvc_display, 0); - -static int pvc_line_proc_show(struct seq_file *m, void *v) -{ - int lineno = *(int *)m->private; - - if (lineno < 0 || lineno >= PVC_NLINES) { - printk(KERN_WARNING "proc_read_line: invalid lineno %d\n", lineno); - return 0; - } - - mutex_lock(&pvc_mutex); - seq_printf(m, "%s\n", pvc_lines[lineno]); - mutex_unlock(&pvc_mutex); - - return 0; -} - -static int pvc_line_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pvc_line_proc_show, PDE_DATA(inode)); -} - -static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - int lineno = *(int *)PDE_DATA(file_inode(file)); - char kbuf[PVC_LINELEN]; - size_t len; - - BUG_ON(lineno < 0 || lineno >= PVC_NLINES); - - len = min(count, sizeof(kbuf) - 1); - if (copy_from_user(kbuf, buf, len)) - return -EFAULT; - kbuf[len] = '\0'; - - if (len > 0 && kbuf[len - 1] == '\n') - len--; - - mutex_lock(&pvc_mutex); - strncpy(pvc_lines[lineno], kbuf, len); - pvc_lines[lineno][len] = '\0'; - mutex_unlock(&pvc_mutex); - - tasklet_schedule(&pvc_display_tasklet); - - return count; -} - -static const struct proc_ops pvc_line_proc_ops = { - .proc_open = pvc_line_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = pvc_line_proc_write, -}; - -static ssize_t pvc_scroll_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - char kbuf[42]; - size_t len; - int cmd; - - len = min(count, sizeof(kbuf) - 1); - if (copy_from_user(kbuf, buf, len)) - return -EFAULT; - kbuf[len] = '\0'; - - cmd = simple_strtol(kbuf, NULL, 10); - - mutex_lock(&pvc_mutex); - if (scroll_interval != 0) - del_timer(&timer); - - if (cmd == 0) { - scroll_dir = 0; - scroll_interval = 0; - } else { - if (cmd < 0) { - scroll_dir = -1; - scroll_interval = -cmd; - } else { - scroll_dir = 1; - scroll_interval = cmd; - } - add_timer(&timer); - } - mutex_unlock(&pvc_mutex); - - return count; -} - -static int pvc_scroll_proc_show(struct seq_file *m, void *v) -{ - mutex_lock(&pvc_mutex); - seq_printf(m, "%d\n", scroll_dir * scroll_interval); - mutex_unlock(&pvc_mutex); - - return 0; -} - -static int pvc_scroll_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pvc_scroll_proc_show, NULL); -} - -static const struct proc_ops pvc_scroll_proc_ops = { - .proc_open = pvc_scroll_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = pvc_scroll_proc_write, -}; - -void pvc_proc_timerfunc(struct timer_list *unused) -{ - if (scroll_dir < 0) - pvc_move(DISPLAY|RIGHT); - else if (scroll_dir > 0) - pvc_move(DISPLAY|LEFT); - - timer.expires = jiffies + scroll_interval; - add_timer(&timer); -} - -static void pvc_proc_cleanup(void) -{ - remove_proc_subtree(DISPLAY_DIR_NAME, NULL); - del_timer_sync(&timer); -} - -static int __init pvc_proc_init(void) -{ - struct proc_dir_entry *dir, *proc_entry; - int i; - - dir = proc_mkdir(DISPLAY_DIR_NAME, NULL); - if (dir == NULL) - goto error; - - for (i = 0; i < PVC_NLINES; i++) { - strcpy(pvc_lines[i], ""); - pvc_linedata[i] = i; - } - for (i = 0; i < PVC_NLINES; i++) { - proc_entry = proc_create_data(pvc_linename[i], 0644, dir, - &pvc_line_proc_ops, &pvc_linedata[i]); - if (proc_entry == NULL) - goto error; - } - proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_ops); - if (proc_entry == NULL) - goto error; - - timer_setup(&timer, pvc_proc_timerfunc, 0); - - return 0; -error: - pvc_proc_cleanup(); - return -ENOMEM; -} - -module_init(pvc_proc_init); -module_exit(pvc_proc_cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c deleted file mode 100644 index 5ce1407de2d5..000000000000 --- a/arch/mips/lasat/prom.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PROM interface routines. - */ -#include <linux/types.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/memblock.h> -#include <linux/ioport.h> -#include <asm/bootinfo.h> -#include <asm/lasat/lasat.h> -#include <asm/cpu.h> -#include <asm/setup.h> - -#include "at93c.h" -#include <asm/lasat/eeprom.h> -#include "prom.h" - -#define RESET_VECTOR 0xbfc00000 -#define PROM_JUMP_TABLE_ENTRY(n) (*((u32 *)(RESET_VECTOR + 0x20) + n)) -#define PROM_DISPLAY_ADDR PROM_JUMP_TABLE_ENTRY(0) -#define PROM_PUTC_ADDR PROM_JUMP_TABLE_ENTRY(1) -#define PROM_MONITOR_ADDR PROM_JUMP_TABLE_ENTRY(2) - -static void null_prom_display(const char *string, int pos, int clear) -{ -} - -static void null_prom_monitor(void) -{ -} - -static void null_prom_putc(char c) -{ -} - -/* these are functions provided by the bootloader */ -static void (*__prom_putc)(char c) = null_prom_putc; - -void prom_putchar(char c) -{ - __prom_putc(c); -} - -void (*prom_display)(const char *string, int pos, int clear) = - null_prom_display; -void (*prom_monitor)(void) = null_prom_monitor; - -unsigned int lasat_ndelay_divider; - -static void setup_prom_vectors(void) -{ - u32 version = *(u32 *)(RESET_VECTOR + 0x90); - - if (version >= 307) { - prom_display = (void *)PROM_DISPLAY_ADDR; - __prom_putc = (void *)PROM_PUTC_ADDR; - prom_monitor = (void *)PROM_MONITOR_ADDR; - } - printk(KERN_DEBUG "prom vectors set up\n"); -} - -static struct at93c_defs at93c_defs[N_MACHTYPES] = { - { - .reg = (void *)AT93C_REG_100, - .rdata_reg = (void *)AT93C_RDATA_REG_100, - .rdata_shift = AT93C_RDATA_SHIFT_100, - .wdata_shift = AT93C_WDATA_SHIFT_100, - .cs = AT93C_CS_M_100, - .clk = AT93C_CLK_M_100 - }, { - .reg = (void *)AT93C_REG_200, - .rdata_reg = (void *)AT93C_RDATA_REG_200, - .rdata_shift = AT93C_RDATA_SHIFT_200, - .wdata_shift = AT93C_WDATA_SHIFT_200, - .cs = AT93C_CS_M_200, - .clk = AT93C_CLK_M_200 - }, -}; - -void __init prom_init(void) -{ - int argc = fw_arg0; - char **argv = (char **) fw_arg1; - - setup_prom_vectors(); - - if (IS_LASAT_200()) { - printk(KERN_INFO "LASAT 200 board\n"); - lasat_ndelay_divider = LASAT_200_DIVIDER; - at93c = &at93c_defs[1]; - } else { - printk(KERN_INFO "LASAT 100 board\n"); - lasat_ndelay_divider = LASAT_100_DIVIDER; - at93c = &at93c_defs[0]; - } - - lasat_init_board_info(); /* Read info from EEPROM */ - - /* Get the command line */ - if (argc > 0) { - strncpy(arcs_cmdline, argv[0], COMMAND_LINE_SIZE-1); - arcs_cmdline[COMMAND_LINE_SIZE-1] = '\0'; - } - - /* Set the I/O base address */ - set_io_port_base(KSEG1); - - /* Set memory regions */ - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; /* Wrong, fixme. */ - - add_memory_region(0, lasat_board_info.li_memsize, BOOT_MEM_RAM); -} - -void __init prom_free_prom_memory(void) -{ -} - -const char *get_system_type(void) -{ - return lasat_board_info.li_bmstr; -} diff --git a/arch/mips/lasat/prom.h b/arch/mips/lasat/prom.h deleted file mode 100644 index 3d1df853e9d3..000000000000 --- a/arch/mips/lasat/prom.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PROM_H -#define __PROM_H - -extern void (*prom_display)(const char *string, int pos, int clear); -extern void (*prom_monitor)(void); - -#endif /* __PROM_H */ diff --git a/arch/mips/lasat/reset.c b/arch/mips/lasat/reset.c deleted file mode 100644 index 7c516ed9af15..000000000000 --- a/arch/mips/lasat/reset.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Reset the LASAT board. - */ -#include <linux/kernel.h> -#include <linux/pm.h> - -#include <asm/reboot.h> -#include <asm/lasat/lasat.h> - -#include "picvue.h" -#include "prom.h" - -static void lasat_machine_restart(char *command); -static void lasat_machine_halt(void); - -/* Used to set machine to boot in service mode via /proc interface */ -int lasat_boot_to_service; - -static void lasat_machine_restart(char *command) -{ - local_irq_disable(); - - if (lasat_boot_to_service) { - *(volatile unsigned int *)0xa0000024 = 0xdeadbeef; - *(volatile unsigned int *)0xa00000fc = 0xfedeabba; - } - *lasat_misc->reset_reg = 0xbedead; - for (;;) ; -} - -static void lasat_machine_halt(void) -{ - local_irq_disable(); - - prom_monitor(); - for (;;) ; -} - -void lasat_reboot_setup(void) -{ - _machine_restart = lasat_machine_restart; - _machine_halt = lasat_machine_halt; - pm_power_off = lasat_machine_halt; -} diff --git a/arch/mips/lasat/serial.c b/arch/mips/lasat/serial.c deleted file mode 100644 index 16b242713420..000000000000 --- a/arch/mips/lasat/serial.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Registration of Lasat UART platform device. - * - * Copyright (C) 2007 Brian Murphy <brian@murphy.dk> - */ -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> -#include <linux/serial_8250.h> - -#include <asm/lasat/lasat.h> -#include <asm/lasat/serial.h> - -static struct resource lasat_serial_res[2] __initdata; - -static struct plat_serial8250_port lasat_serial8250_port[] = { - { - .iotype = UPIO_MEM, - .flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF | - UPF_SKIP_TEST, - }, - {}, -}; - -static __init int lasat_uart_add(void) -{ - struct platform_device *pdev; - int retval; - - pdev = platform_device_alloc("serial8250", -1); - if (!pdev) - return -ENOMEM; - - if (!IS_LASAT_200()) { - lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_100); - lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_100 * 8 - 1; - lasat_serial_res[0].flags = IORESOURCE_MEM; - lasat_serial_res[1].start = LASATINT_UART_100; - lasat_serial_res[1].end = LASATINT_UART_100; - lasat_serial_res[1].flags = IORESOURCE_IRQ; - - lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_100; - lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_100 * 16; - lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_100; - lasat_serial8250_port[0].irq = LASATINT_UART_100; - } else { - lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_200); - lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_200 * 8 - 1; - lasat_serial_res[0].flags = IORESOURCE_MEM; - lasat_serial_res[1].start = LASATINT_UART_200; - lasat_serial_res[1].end = LASATINT_UART_200; - lasat_serial_res[1].flags = IORESOURCE_IRQ; - - lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_200; - lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_200 * 16; - lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_200; - lasat_serial8250_port[0].irq = LASATINT_UART_200; - } - - pdev->id = PLAT8250_DEV_PLATFORM; - pdev->dev.platform_data = lasat_serial8250_port; - - retval = platform_device_add_resources(pdev, lasat_serial_res, ARRAY_SIZE(lasat_serial_res)); - if (retval) - goto err_free_device; - - retval = platform_device_add(pdev); - if (retval) - goto err_free_device; - - return 0; - -err_free_device: - platform_device_put(pdev); - - return retval; -} -device_initcall(lasat_uart_add); diff --git a/arch/mips/lasat/setup.c b/arch/mips/lasat/setup.c deleted file mode 100644 index 0743243fd86d..000000000000 --- a/arch/mips/lasat/setup.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Brian Murphy <brian@murphy.dk> - * - * Lasat specific setup. - */ -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/tty.h> - -#include <asm/time.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/lasat/lasat.h> -#include <asm/lasat/serial.h> - -#ifdef CONFIG_PICVUE -#include <linux/notifier.h> -#endif - -#include "ds1603.h" -#include <asm/lasat/ds1603.h> -#include <asm/lasat/picvue.h> -#include <asm/lasat/eeprom.h> - -#include "prom.h" - -int lasat_command_line; -void lasatint_init(void); - -extern void lasat_reboot_setup(void); -extern void pcisetup(void); -extern void edhac_init(void *, void *, void *); -extern void addrflt_init(void); - -struct lasat_misc lasat_misc_info[N_MACHTYPES] = { - { - .reset_reg = (void *)KSEG1ADDR(0x1c840000), - .flash_wp_reg = (void *)KSEG1ADDR(0x1c800000), 2 - }, { - .reset_reg = (void *)KSEG1ADDR(0x11080000), - .flash_wp_reg = (void *)KSEG1ADDR(0x11000000), 6 - } -}; - -struct lasat_misc *lasat_misc; - -#ifdef CONFIG_DS1603 -static struct ds_defs ds_defs[N_MACHTYPES] = { - { (void *)DS1603_REG_100, (void *)DS1603_REG_100, - DS1603_RST_100, DS1603_CLK_100, DS1603_DATA_100, - DS1603_DATA_SHIFT_100, 0, 0 }, - { (void *)DS1603_REG_200, (void *)DS1603_DATA_REG_200, - DS1603_RST_200, DS1603_CLK_200, DS1603_DATA_200, - DS1603_DATA_READ_SHIFT_200, 1, 2000 } -}; -#endif - -#ifdef CONFIG_PICVUE -#include "picvue.h" -static struct pvc_defs pvc_defs[N_MACHTYPES] = { - { (void *)PVC_REG_100, PVC_DATA_SHIFT_100, PVC_DATA_M_100, - PVC_E_100, PVC_RW_100, PVC_RS_100 }, - { (void *)PVC_REG_200, PVC_DATA_SHIFT_200, PVC_DATA_M_200, - PVC_E_200, PVC_RW_200, PVC_RS_200 } -}; -#endif - -static int lasat_panic_display(struct notifier_block *this, - unsigned long event, void *ptr) -{ -#ifdef CONFIG_PICVUE - unsigned char *string = ptr; - if (string == NULL) - string = "Kernel Panic"; - pvc_dump_string(string); -#endif - return NOTIFY_DONE; -} - -static int lasat_panic_prom_monitor(struct notifier_block *this, - unsigned long event, void *ptr) -{ - prom_monitor(); - return NOTIFY_DONE; -} - -static struct notifier_block lasat_panic_block[] = -{ - { - .notifier_call = lasat_panic_display, - .priority = INT_MAX - }, { - .notifier_call = lasat_panic_prom_monitor, - .priority = INT_MIN - } -}; - -void __init plat_time_init(void) -{ - mips_hpt_frequency = lasat_board_info.li_cpu_hz / 2; - - change_c0_status(ST0_IM, IE_IRQ0); -} - -void __init plat_mem_setup(void) -{ - int i; - int lasat_type = IS_LASAT_200() ? 1 : 0; - - lasat_misc = &lasat_misc_info[lasat_type]; -#ifdef CONFIG_PICVUE - picvue = &pvc_defs[lasat_type]; -#endif - - /* Set up panic notifier */ - for (i = 0; i < ARRAY_SIZE(lasat_panic_block); i++) - atomic_notifier_chain_register(&panic_notifier_list, - &lasat_panic_block[i]); - - lasat_reboot_setup(); - -#ifdef CONFIG_DS1603 - ds1603 = &ds_defs[lasat_type]; -#endif - -#ifdef DYNAMIC_SERIAL_INIT - serial_init(); -#endif - - pr_info("Lasat specific initialization complete\n"); -} diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c deleted file mode 100644 index e666fe26c50d..000000000000 --- a/arch/mips/lasat/sysctl.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Routines specific to the LASAT boards - */ -#include <linux/types.h> -#include <asm/lasat/lasat.h> - -#include <linux/sysctl.h> -#include <linux/stddef.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/ctype.h> -#include <linux/string.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/uaccess.h> - -#include <asm/time.h> - -#ifdef CONFIG_DS1603 -#include "ds1603.h" -#endif - - -/* And the same for proc */ -int proc_dolasatstring(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int r; - - r = proc_dostring(table, write, buffer, lenp, ppos); - if ((!write) || r) - return r; - - lasat_write_eeprom_info(); - - return 0; -} - -#ifdef CONFIG_DS1603 -static int rtctmp; - -/* proc function to read/write RealTime Clock */ -int proc_dolasatrtc(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct timespec64 ts; - int r; - - if (!write) { - read_persistent_clock64(&ts); - rtctmp = ts.tv_sec; - /* check for time < 0 and set to 0 */ - if (rtctmp < 0) - rtctmp = 0; - } - r = proc_dointvec(table, write, buffer, lenp, ppos); - if (r) - return r; - - if (write) { - /* - * Due to the RTC hardware limitation, we can not actually - * use the full 64-bit range here. - */ - ts.tv_sec = rtctmp; - ts.tv_nsec = 0; - - update_persistent_clock64(ts); - } - - return 0; -} -#endif - -#ifdef CONFIG_INET -int proc_lasat_ip(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - unsigned int ip; - char *p, c; - int len; - char ipbuf[32]; - - if (!table->data || !table->maxlen || !*lenp || - (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = 0; - p = buffer; - while (len < *lenp) { - if (get_user(c, p++)) - return -EFAULT; - if (c == 0 || c == '\n') - break; - len++; - } - if (len >= sizeof(ipbuf)-1) - len = sizeof(ipbuf) - 1; - if (copy_from_user(ipbuf, buffer, len)) - return -EFAULT; - ipbuf[len] = 0; - *ppos += *lenp; - /* Now see if we can convert it to a valid IP */ - ip = in_aton(ipbuf); - *(unsigned int *)(table->data) = ip; - lasat_write_eeprom_info(); - } else { - ip = *(unsigned int *)(table->data); - sprintf(ipbuf, "%d.%d.%d.%d", - (ip) & 0xff, - (ip >> 8) & 0xff, - (ip >> 16) & 0xff, - (ip >> 24) & 0xff); - len = strlen(ipbuf); - if (len > *lenp) - len = *lenp; - if (len) - if (copy_to_user(buffer, ipbuf, len)) - return -EFAULT; - if (len < *lenp) { - if (put_user('\n', ((char *) buffer) + len)) - return -EFAULT; - len++; - } - *lenp = len; - *ppos += len; - } - - return 0; -} -#endif - -int proc_lasat_prid(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int r; - - r = proc_dointvec(table, write, buffer, lenp, ppos); - if (r < 0) - return r; - if (write) { - lasat_board_info.li_eeprom_info.prid = - lasat_board_info.li_prid; - lasat_write_eeprom_info(); - lasat_init_board_info(); - } - return 0; -} - -extern int lasat_boot_to_service; - -static struct ctl_table lasat_table[] = { - { - .procname = "cpu-hz", - .data = &lasat_board_info.li_cpu_hz, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "bus-hz", - .data = &lasat_board_info.li_bus_hz, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "bmid", - .data = &lasat_board_info.li_bmid, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "prid", - .data = &lasat_board_info.li_prid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_prid, - }, -#ifdef CONFIG_INET - { - .procname = "ipaddr", - .data = &lasat_board_info.li_eeprom_info.ipaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_ip, - }, - { - .procname = "netmask", - .data = &lasat_board_info.li_eeprom_info.netmask, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_ip, - }, -#endif - { - .procname = "passwd_hash", - .data = &lasat_board_info.li_eeprom_info.passwd_hash, - .maxlen = - sizeof(lasat_board_info.li_eeprom_info.passwd_hash), - .mode = 0600, - .proc_handler = proc_dolasatstring, - }, - { - .procname = "boot-service", - .data = &lasat_boot_to_service, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_DS1603 - { - .procname = "rtc", - .data = &rtctmp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dolasatrtc, - }, -#endif - { - .procname = "namestr", - .data = &lasat_board_info.li_namestr, - .maxlen = sizeof(lasat_board_info.li_namestr), - .mode = 0444, - .proc_handler = proc_dostring, - }, - { - .procname = "typestr", - .data = &lasat_board_info.li_typestr, - .maxlen = sizeof(lasat_board_info.li_typestr), - .mode = 0444, - .proc_handler = proc_dostring, - }, - {} -}; - -static struct ctl_table lasat_root_table[] = { - { - .procname = "lasat", - .mode = 0555, - .child = lasat_table - }, - {} -}; - -static int __init lasat_register_sysctl(void) -{ - struct ctl_table_header *lasat_table_header; - - lasat_table_header = - register_sysctl_table(lasat_root_table); - if (!lasat_table_header) { - printk(KERN_ERR "Unable to register LASAT sysctl\n"); - return -ENOMEM; - } - - return 0; -} - -arch_initcall(lasat_register_sysctl); diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index fda7b57b826e..87fda0713b84 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -279,7 +279,8 @@ EXPORT_SYMBOL(csum_partial) #endif /* odd buffer alignment? */ -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum @@ -732,7 +733,8 @@ EXPORT_SYMBOL(csum_partial) addu sum, v1 #endif -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig index 595dd48e1e4d..96dc6eba4310 100644 --- a/arch/mips/loongson2ef/Kconfig +++ b/arch/mips/loongson2ef/Kconfig @@ -46,7 +46,6 @@ config LEMOTE_MACH2F select CSRC_R4K if ! MIPS_EXTERNAL_TIMER select DMA_NONCOHERENT select GENERIC_ISA_DMA_SUPPORT_BROKEN - select HAVE_CLK select FORCE_PCI select I8259 select IRQ_MIPS_CPU diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 3aca42963f35..cdad3c1a9a18 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -26,7 +26,6 @@ endif # Loongson Machines' Support # -platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/ cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c index 45512178be77..ce3f02f75e2a 100644 --- a/arch/mips/loongson2ef/common/init.c +++ b/arch/mips/loongson2ef/common/init.c @@ -19,10 +19,10 @@ unsigned long __maybe_unused _loongson_addrwincfg_base; static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/loongson2ef/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c index 414f282c8ab5..850b6b9f8f15 100644 --- a/arch/mips/loongson2ef/lemote-2f/clock.c +++ b/arch/mips/loongson2ef/lemote-2f/clock.c @@ -6,22 +6,12 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ -#include <linux/clk.h> #include <linux/cpufreq.h> #include <linux/errno.h> #include <linux/export.h> -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> -#include <asm/clock.h> #include <asm/mach-loongson2ef/loongson.h> -static LIST_HEAD(clock_list); -static DEFINE_SPINLOCK(clock_lock); -static DEFINE_MUTEX(clock_list_sem); - -/* Minimum CLK support */ enum { DC_ZERO, DC_25PT = 2, DC_37PT, DC_50PT, DC_62PT, DC_75PT, DC_87PT, DC_DISABLE, DC_RESV @@ -41,103 +31,21 @@ struct cpufreq_frequency_table loongson2_clockmod_table[] = { }; EXPORT_SYMBOL_GPL(loongson2_clockmod_table); -static struct clk cpu_clk = { - .name = "cpu_clk", - .flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES, - .rate = 800000000, -}; - -struct clk *clk_get(struct device *dev, const char *id) -{ - return &cpu_clk; -} -EXPORT_SYMBOL(clk_get); - -static void propagate_rate(struct clk *clk) -{ - struct clk *clkp; - - list_for_each_entry(clkp, &clock_list, node) { - if (likely(clkp->parent != clk)) - continue; - if (likely(clkp->ops && clkp->ops->recalc)) - clkp->ops->recalc(clkp); - if (unlikely(clkp->flags & CLK_RATE_PROPAGATES)) - propagate_rate(clkp); - } -} - -int clk_enable(struct clk *clk) -{ - return 0; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) +int loongson2_cpu_set_rate(unsigned long rate_khz) { -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (!clk) - return 0; - - return (unsigned long)clk->rate; -} -EXPORT_SYMBOL(clk_get_rate); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL(clk_put); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - unsigned int rate_khz = rate / 1000; struct cpufreq_frequency_table *pos; - int ret = 0; int regval; - if (likely(clk->ops && clk->ops->set_rate)) { - unsigned long flags; - - spin_lock_irqsave(&clock_lock, flags); - ret = clk->ops->set_rate(clk, rate, 0); - spin_unlock_irqrestore(&clock_lock, flags); - } - - if (unlikely(clk->flags & CLK_RATE_PROPAGATES)) - propagate_rate(clk); - cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table) if (rate_khz == pos->frequency) break; if (rate_khz != pos->frequency) return -ENOTSUPP; - clk->rate = rate; - regval = readl(LOONGSON_CHIPCFG); regval = (regval & ~0x7) | (pos->driver_data - 1); writel(regval, LOONGSON_CHIPCFG); - return ret; -} -EXPORT_SYMBOL_GPL(clk_set_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (likely(clk->ops && clk->ops->round_rate)) { - unsigned long flags, rounded; - - spin_lock_irqsave(&clock_lock, flags); - rounded = clk->ops->round_rate(clk, rate); - spin_unlock_irqrestore(&clock_lock, flags); - - return rounded; - } - - return rate; + return 0; } -EXPORT_SYMBOL_GPL(clk_round_rate); +EXPORT_SYMBOL_GPL(loongson2_cpu_set_rate); diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform index 7f8e342f1ef5..3b9673e7a2fa 100644 --- a/arch/mips/loongson32/Platform +++ b/arch/mips/loongson32/Platform @@ -1,4 +1,3 @@ cflags-$(CONFIG_CPU_LOONGSON32) += -march=mips32r2 -Wa,--trap -platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32 load-$(CONFIG_CPU_LOONGSON32) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 48b29c198acf..517f1f8e81fb 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -4,18 +4,12 @@ if MACH_LOONGSON64 config RS780_HPET bool "RS780/SBX00 HPET Timer" depends on MACH_LOONGSON64 + depends on BROKEN select MIPS_EXTERNAL_TIMER help This option enables the hpet timer of AMD RS780/SBX00. - If you want to enable the Loongson3 CPUFreq Driver, Please enable - this option at first, otherwise, You will get wrong system time. - - If unsure, say Yes. - - -config LOONGSON_MC146818 - bool - default n + Note: This driver is doing some dangerous hack. Please only enable + it on RS780E systems. endif # MACH_LOONGSON64 diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile index b7f40b179c71..39c06f52b08f 100644 --- a/arch/mips/loongson64/Makefile +++ b/arch/mips/loongson64/Makefile @@ -2,12 +2,12 @@ # # Makefile for Loongson-3 family machines # -obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o acpi_init.o dma.o \ +obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o dma.o \ setup.o init.o env.o time.o reset.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_RS780_HPET) += hpet.o -obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_LOONGSON_MC146818) += rtc.o obj-$(CONFIG_SUSPEND) += pm.o +obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o +obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index d5eb94c9edb4..ec42c5085905 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -55,6 +55,5 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/acpi_init.c b/arch/mips/loongson64/acpi_init.c deleted file mode 100644 index 8d7c119ddf91..000000000000 --- a/arch/mips/loongson64/acpi_init.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/io.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/export.h> - -#define SBX00_ACPI_IO_BASE 0x800 -#define SBX00_ACPI_IO_SIZE 0x100 - -#define ACPI_PM_EVT_BLK (SBX00_ACPI_IO_BASE + 0x00) /* 4 bytes */ -#define ACPI_PM_CNT_BLK (SBX00_ACPI_IO_BASE + 0x04) /* 2 bytes */ -#define ACPI_PMA_CNT_BLK (SBX00_ACPI_IO_BASE + 0x0F) /* 1 byte */ -#define ACPI_PM_TMR_BLK (SBX00_ACPI_IO_BASE + 0x18) /* 4 bytes */ -#define ACPI_GPE0_BLK (SBX00_ACPI_IO_BASE + 0x10) /* 8 bytes */ -#define ACPI_END (SBX00_ACPI_IO_BASE + 0x80) - -#define PM_INDEX 0xCD6 -#define PM_DATA 0xCD7 -#define PM2_INDEX 0xCD0 -#define PM2_DATA 0xCD1 - -/* - * SCI interrupt need acpi space, allocate here - */ - -static int __init register_acpi_resource(void) -{ - request_region(SBX00_ACPI_IO_BASE, SBX00_ACPI_IO_SIZE, "acpi"); - return 0; -} - -static void pmio_write_index(u16 index, u8 reg, u8 value) -{ - outb(reg, index); - outb(value, index + 1); -} - -static u8 pmio_read_index(u16 index, u8 reg) -{ - outb(reg, index); - return inb(index + 1); -} - -void pm_iowrite(u8 reg, u8 value) -{ - pmio_write_index(PM_INDEX, reg, value); -} -EXPORT_SYMBOL(pm_iowrite); - -u8 pm_ioread(u8 reg) -{ - return pmio_read_index(PM_INDEX, reg); -} -EXPORT_SYMBOL(pm_ioread); - -void pm2_iowrite(u8 reg, u8 value) -{ - pmio_write_index(PM2_INDEX, reg, value); -} -EXPORT_SYMBOL(pm2_iowrite); - -u8 pm2_ioread(u8 reg) -{ - return pmio_read_index(PM2_INDEX, reg); -} -EXPORT_SYMBOL(pm2_ioread); - -static void acpi_hw_clear_status(void) -{ - u16 value; - - /* PMStatus: Clear WakeStatus/PwrBtnStatus */ - value = inw(ACPI_PM_EVT_BLK); - value |= (1 << 8 | 1 << 15); - outw(value, ACPI_PM_EVT_BLK); - - /* GPEStatus: Clear all generated events */ - outl(inl(ACPI_GPE0_BLK), ACPI_GPE0_BLK); -} - -void acpi_registers_setup(void) -{ - u32 value; - - /* PM Status Base */ - pm_iowrite(0x20, ACPI_PM_EVT_BLK & 0xff); - pm_iowrite(0x21, ACPI_PM_EVT_BLK >> 8); - - /* PM Control Base */ - pm_iowrite(0x22, ACPI_PM_CNT_BLK & 0xff); - pm_iowrite(0x23, ACPI_PM_CNT_BLK >> 8); - - /* GPM Base */ - pm_iowrite(0x28, ACPI_GPE0_BLK & 0xff); - pm_iowrite(0x29, ACPI_GPE0_BLK >> 8); - - /* ACPI End */ - pm_iowrite(0x2e, ACPI_END & 0xff); - pm_iowrite(0x2f, ACPI_END >> 8); - - /* IO Decode: When AcpiDecodeEnable set, South-Bridge uses the contents - * of the PM registers at index 0x20~0x2B to decode ACPI I/O address. */ - pm_iowrite(0x0e, 1 << 3); - - /* SCI_EN set */ - outw(1, ACPI_PM_CNT_BLK); - - /* Enable to generate SCI */ - pm_iowrite(0x10, pm_ioread(0x10) | 1); - - /* GPM3/GPM9 enable */ - value = inl(ACPI_GPE0_BLK + 4); - outl(value | (1 << 14) | (1 << 22), ACPI_GPE0_BLK + 4); - - /* Set GPM9 as input */ - pm_iowrite(0x8d, pm_ioread(0x8d) & (~(1 << 1))); - - /* Set GPM9 as non-output */ - pm_iowrite(0x94, pm_ioread(0x94) | (1 << 3)); - - /* GPM3 config ACPI trigger SCIOUT */ - pm_iowrite(0x33, pm_ioread(0x33) & (~(3 << 4))); - - /* GPM9 config ACPI trigger SCIOUT */ - pm_iowrite(0x3d, pm_ioread(0x3d) & (~(3 << 2))); - - /* GPM3 config falling edge trigger */ - pm_iowrite(0x37, pm_ioread(0x37) & (~(1 << 6))); - - /* No wait for STPGNT# in ACPI Sx state */ - pm_iowrite(0x7c, pm_ioread(0x7c) | (1 << 6)); - - /* Set GPM3 pull-down enable */ - value = pm2_ioread(0xf6); - value |= ((1 << 7) | (1 << 3)); - pm2_iowrite(0xf6, value); - - /* Set GPM9 pull-down enable */ - value = pm2_ioread(0xf8); - value |= ((1 << 5) | (1 << 1)); - pm2_iowrite(0xf8, value); -} - -int __init sbx00_acpi_init(void) -{ - register_acpi_resource(); - acpi_registers_setup(); - acpi_hw_clear_status(); - - return 0; -} diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c index 9efdfe430ff0..f130f62129b8 100644 --- a/arch/mips/loongson64/cop2-ex.c +++ b/arch/mips/loongson64/cop2-ex.c @@ -14,17 +14,30 @@ #include <linux/sched.h> #include <linux/notifier.h> #include <linux/ptrace.h> +#include <linux/uaccess.h> +#include <linux/sched/signal.h> #include <asm/fpu.h> #include <asm/cop2.h> +#include <asm/inst.h> +#include <asm/branch.h> #include <asm/current.h> #include <asm/mipsregs.h> +#include <asm/unaligned-emul.h> static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, void *data) { - int fpu_owned; + unsigned int res, fpu_owned; + unsigned long ra, value, value_next; + union mips_instruction insn; int fr = !test_thread_flag(TIF_32BIT_FPREGS); + struct pt_regs *regs = (struct pt_regs *)data; + void __user *addr = (void __user *)regs->cp0_badvaddr; + unsigned int __user *pc = (unsigned int __user *)exception_epc(regs); + + ra = regs->regs[31]; + __get_user(insn.word, pc); switch (action) { case CU2_EXCEPTION: @@ -49,9 +62,284 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, preempt_enable(); return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_LWC2_OP: + if (insn.loongson3_lswc2_format.ls == 0) + goto sigbus; + + if (insn.loongson3_lswc2_format.fr == 0) { /* gslq */ + if (!access_ok(addr, 16)) + goto sigbus; + + LoadDW(addr, value, res); + if (res) + goto fault; + + LoadDW(addr + 8, value_next, res); + if (res) + goto fault; + + regs->regs[insn.loongson3_lswc2_format.rt] = value; + regs->regs[insn.loongson3_lswc2_format.rq] = value_next; + compute_return_epc(regs); + } else { /* gslqc1 */ + if (!access_ok(addr, 16)) + goto sigbus; + + lose_fpu(1); + LoadDW(addr, value, res); + if (res) + goto fault; + + LoadDW(addr + 8, value_next, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rt, value); + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rq, value_next); + compute_return_epc(regs); + own_fpu(1); + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_SWC2_OP: + if (insn.loongson3_lswc2_format.ls == 0) + goto sigbus; + + if (insn.loongson3_lswc2_format.fr == 0) { /* gssq */ + if (!access_ok(addr, 16)) + goto sigbus; + + /* write upper 8 bytes first */ + value_next = regs->regs[insn.loongson3_lswc2_format.rq]; + + StoreDW(addr + 8, value_next, res); + if (res) + goto fault; + value = regs->regs[insn.loongson3_lswc2_format.rt]; + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + } else { /* gssqc1 */ + if (!access_ok(addr, 16)) + goto sigbus; + + lose_fpu(1); + value_next = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rq); + + StoreDW(addr + 8, value_next, res); + if (res) + goto fault; + + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rt); + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_LDC2_OP: + switch (insn.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden ldc2 instructions. + * opcode1 instruction + * 0x1 gslhx: load 2 bytes to GPR + * 0x2 gslwx: load 4 bytes to GPR + * 0x3 gsldx: load 8 bytes to GPR + * 0x6 gslwxc1: load 4 bytes to FPR + * 0x7 gsldxc1: load 8 bytes to FPR + */ + case 0x1: + if (!access_ok(addr, 2)) + goto sigbus; + + LoadHW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x2: + if (!access_ok(addr, 4)) + goto sigbus; + + LoadW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x3: + if (!access_ok(addr, 8)) + goto sigbus; + + LoadDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x6: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + if (!access_ok(addr, 4)) + goto sigbus; + + lose_fpu(1); + LoadW(addr, value, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt, value); + compute_return_epc(regs); + own_fpu(1); + + break; + case 0x7: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + if (!access_ok(addr, 8)) + goto sigbus; + + lose_fpu(1); + LoadDW(addr, value, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt, value); + compute_return_epc(regs); + own_fpu(1); + break; + + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_SDC2_OP: + switch (insn.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden sdc2 instructions. + * opcode1 instruction + * 0x1 gsshx: store 2 bytes from GPR + * 0x2 gsswx: store 4 bytes from GPR + * 0x3 gssdx: store 8 bytes from GPR + * 0x6 gsswxc1: store 4 bytes from FPR + * 0x7 gssdxc1: store 8 bytes from FPR + */ + case 0x1: + if (!access_ok(addr, 2)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreHW(addr, value, res); + if (res) + goto fault; + + break; + case 0x2: + if (!access_ok(addr, 4)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreW(addr, value, res); + if (res) + goto fault; + + break; + case 0x3: + if (!access_ok(addr, 8)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreDW(addr, value, res); + if (res) + goto fault; + + break; + + case 0x6: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + + if (!access_ok(addr, 4)) + goto sigbus; + + lose_fpu(1); + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt); + + StoreW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + + break; + case 0x7: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + + if (!access_ok(addr, 8)) + goto sigbus; + + lose_fpu(1); + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt); + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + + break; + } + return NOTIFY_STOP; /* Don't call default notifier */ } return NOTIFY_OK; /* Let default notifier send signals */ + +fault: + /* roll back jump/branch */ + regs->regs[31] = ra; + regs->cp0_epc = (unsigned long)pc; + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return NOTIFY_STOP; /* Don't call default notifier */ + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return NOTIFY_STOP; /* Don't call default notifier */ + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return NOTIFY_STOP; /* Don't call default notifier */ } static int __init loongson_cu2_setup(void) diff --git a/arch/mips/loongson64/cpucfg-emul.c b/arch/mips/loongson64/cpucfg-emul.c new file mode 100644 index 000000000000..cd619b47ba1f --- /dev/null +++ b/arch/mips/loongson64/cpucfg-emul.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpu.h> +#include <asm/cpu-info.h> +#include <asm/elf.h> + +#include <loongson_regs.h> +#include <cpucfg-emul.h> + +static bool is_loongson(struct cpuinfo_mips *c) +{ + switch (c->processor_id & PRID_COMP_MASK) { + case PRID_COMP_LEGACY: + return ((c->processor_id & PRID_IMP_MASK) == + PRID_IMP_LOONGSON_64C); + + case PRID_COMP_LOONGSON: + return true; + + default: + return false; + } +} + +static u32 get_loongson_fprev(struct cpuinfo_mips *c) +{ + return c->fpu_id & LOONGSON_FPREV_MASK; +} + +static bool cpu_has_uca(void) +{ + u32 diag = read_c0_diag(); + u32 new_diag; + + if (diag & LOONGSON_DIAG_UCAC) + /* UCA is already enabled. */ + return true; + + /* See if UCAC bit can be flipped on. This should be safe. */ + new_diag = diag | LOONGSON_DIAG_UCAC; + write_c0_diag(new_diag); + new_diag = read_c0_diag(); + write_c0_diag(diag); + + return (new_diag & LOONGSON_DIAG_UCAC) != 0; +} + +static void probe_uca(struct cpuinfo_mips *c) +{ + if (cpu_has_uca()) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LSUCA; +} + +static void decode_loongson_config6(struct cpuinfo_mips *c) +{ + u32 config6 = read_c0_config6(); + + if (config6 & MIPS_CONF6_LOONGSON_SFBEN) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SFBP; + if (config6 & MIPS_CONF6_LOONGSON_LLEXC) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LLEXC; + if (config6 & MIPS_CONF6_LOONGSON_SCRAND) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SCRAND; +} + +static void patch_cpucfg_sel1(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u64 options = c->options; + u32 data = c->loongson3_cpucfg_data[0]; + + if (options & MIPS_CPU_FPU) { + data |= LOONGSON_CFG1_FP; + data |= get_loongson_fprev(c) << LOONGSON_CFG1_FPREV_OFFSET; + } + if (ases & MIPS_ASE_LOONGSON_MMI) + data |= LOONGSON_CFG1_MMI; + if (ases & MIPS_ASE_MSA) + data |= LOONGSON_CFG1_MSA1; + + c->loongson3_cpucfg_data[0] = data; +} + +static void patch_cpucfg_sel2(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u64 options = c->options; + u32 data = c->loongson3_cpucfg_data[1]; + + if (ases & MIPS_ASE_LOONGSON_EXT) + data |= LOONGSON_CFG2_LEXT1; + if (ases & MIPS_ASE_LOONGSON_EXT2) + data |= LOONGSON_CFG2_LEXT2; + if (options & MIPS_CPU_LDPTE) + data |= LOONGSON_CFG2_LSPW; + + if (ases & MIPS_ASE_VZ) + data |= LOONGSON_CFG2_LVZP; + else + data &= ~LOONGSON_CFG2_LVZREV; + + c->loongson3_cpucfg_data[1] = data; +} + +static void patch_cpucfg_sel3(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u32 data = c->loongson3_cpucfg_data[2]; + + if (ases & MIPS_ASE_LOONGSON_CAM) { + data |= LOONGSON_CFG3_LCAMP; + } else { + data &= ~LOONGSON_CFG3_LCAMREV; + data &= ~LOONGSON_CFG3_LCAMNUM; + data &= ~LOONGSON_CFG3_LCAMKW; + data &= ~LOONGSON_CFG3_LCAMVW; + } + + c->loongson3_cpucfg_data[2] = data; +} + +void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c) +{ + /* Only engage the logic on Loongson processors. */ + if (!is_loongson(c)) + return; + + /* CPUs with CPUCFG support don't need to synthesize anything. */ + if (cpu_has_cfg()) + goto have_cpucfg_now; + + c->loongson3_cpucfg_data[0] = 0; + c->loongson3_cpucfg_data[1] = 0; + c->loongson3_cpucfg_data[2] = 0; + + /* Add CPUCFG features non-discoverable otherwise. */ + switch (c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) { + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_0: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_1: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_2: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_3: + decode_loongson_config6(c); + probe_uca(c); + + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LLSYNC | + LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LPMP | + LOONGSON_CFG2_LPM_REV2); + c->loongson3_cpucfg_data[2] = 0; + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R1: + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1); + c->loongson3_cpucfg_data[2] |= ( + LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R1: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R2: + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1); + c->loongson3_cpucfg_data[2] |= ( + LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_1: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_0: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_1: + decode_loongson_config6(c); + probe_uca(c); + + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_CNT64 | + LOONGSON_CFG1_LSLDR0 | LOONGSON_CFG1_LSPREF | + LOONGSON_CFG1_LSPREFX | LOONGSON_CFG1_LSSYNCI | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LBTMMU | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1 | + LOONGSON_CFG2_LVZ_REV1); + c->loongson3_cpucfg_data[2] |= (LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + default: + /* It is possible that some future Loongson cores still do + * not have CPUCFG, so do not emulate anything for these + * cores. + */ + return; + } + + /* This feature is set by firmware, but all known Loongson-64 systems + * are configured this way. + */ + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_CDMAP; + + /* Patch in dynamically probed bits. */ + patch_cpucfg_sel1(c); + patch_cpucfg_sel2(c); + patch_cpucfg_sel3(c); + +have_cpucfg_now: + /* We have usable CPUCFG now, emulated or not. + * Announce CPUCFG availability to userspace via hwcap. + */ + elf_hwcap |= HWCAP_LOONGSON_CPUCFG; +} diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index 5e86635f71db..dbfe6e82fddd 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -2,21 +2,24 @@ #include <linux/dma-direct.h> #include <linux/init.h> #include <linux/swiotlb.h> +#include <boot_param.h> dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ long nid = (paddr >> 44) & 0x3; - return ((nid << 44) ^ paddr) | (nid << 37); + + return ((nid << 44) ^ paddr) | (nid << node_id_offset); } phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ - long nid = (daddr >> 37) & 0x3; - return ((nid << 37) ^ daddr) | (nid << 44); + long nid = (daddr >> node_id_offset) & 0x3; + + return ((nid << node_id_offset) ^ daddr) | (nid << 44); } void __init plat_swiotlb_setup(void) diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index 2554ef11170d..d11bc346bbca 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -14,12 +14,15 @@ * Author: Wu Zhangjin, wuzhangjin@gmail.com */ #include <linux/export.h> +#include <linux/pci_ids.h> #include <asm/bootinfo.h> #include <loongson.h> #include <boot_param.h> #include <builtin_dtbs.h> #include <workarounds.h> +#define HOST_BRIDGE_CONFIG_ADDR ((void __iomem *)TO_UNCAC(0x1a000000)) + u32 cpu_clock_freq; EXPORT_SYMBOL(cpu_clock_freq); struct efi_memory_map_loongson *loongson_memmap; @@ -43,6 +46,8 @@ void __init prom_init_env(void) struct system_loongson *esys; struct efi_cpuinfo_loongson *ecpu; struct irq_source_routing_table *eirq_source; + u32 id; + u16 vendor, device; /* firmware arguments are initialized in head.S */ boot_p = (struct boot_params *)fw_arg2; @@ -178,4 +183,19 @@ void __init prom_init_env(void) memcpy(loongson_sysconf.sensors, esys->sensors, sizeof(struct sensor_device) * loongson_sysconf.nr_sensors); pr_info("CpuClock = %u\n", cpu_clock_freq); + + /* Read the ID of PCI host bridge to detect bridge type */ + id = readl(HOST_BRIDGE_CONFIG_ADDR); + vendor = id & 0xffff; + device = (id >> 16) & 0xffff; + + if (vendor == PCI_VENDOR_ID_LOONGSON && device == 0x7a00) { + pr_info("The bridge chip is LS7A\n"); + loongson_sysconf.bridgetype = LS7A; + loongson_sysconf.early_config = ls7a_early_config; + } else { + pr_info("The bridge chip is RS780E or SR5690\n"); + loongson_sysconf.bridgetype = RS780E; + loongson_sysconf.early_config = rs780e_early_config; + } } diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index da38944471f4..59ddadace83f 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -5,6 +5,7 @@ */ #include <linux/irqchip.h> +#include <linux/logic_pio.h> #include <linux/memblock.h> #include <asm/bootinfo.h> #include <asm/traps.h> @@ -13,25 +14,41 @@ #include <asm/fw/fw.h> #include <loongson.h> +#include <boot_param.h> + +#define NODE_ID_OFFSET_ADDR ((void __iomem *)TO_UNCAC(0x1001041c)) + +u32 node_id_offset; static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } +void ls7a_early_config(void) +{ + node_id_offset = ((readl(NODE_ID_OFFSET_ADDR) >> 8) & 0x1f) + 36; +} + +void rs780e_early_config(void) +{ + node_id_offset = 37; +} + void __init prom_init(void) { fw_init_cmdline(); prom_init_env(); /* init base address of io space */ - set_io_port_base((unsigned long) - ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE)); + set_io_port_base(PCI_IOBASE); + + loongson_sysconf.early_config(); prom_init_numa_memory(); @@ -46,7 +63,45 @@ void __init prom_free_prom_memory(void) { } +static __init void reserve_pio_range(void) +{ + struct logic_pio_hwaddr *range; + + range = kzalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return; + + range->fwnode = &of_root->fwnode; + range->size = MMIO_LOWER_RESERVED; + range->hw_start = LOONGSON_PCIIO_BASE; + range->flags = LOGIC_PIO_CPU_MMIO; + + if (logic_pio_register_range(range)) { + pr_err("Failed to reserve PIO range for legacy ISA\n"); + goto free_range; + } + + if (WARN(range->io_start != 0, + "Reserved PIO range does not start from 0\n")) + goto unregister; + + /* + * i8259 would access I/O space, so mapping must be done here. + * Please remove it when all drivers can be managed by logic_pio. + */ + ioremap_page_range(PCI_IOBASE, PCI_IOBASE + MMIO_LOWER_RESERVED, + LOONGSON_PCIIO_BASE, + pgprot_device(PAGE_KERNEL)); + + return; +unregister: + logic_pio_unregister_range(range); +free_range: + kfree(range); +} + void __init arch_init_irq(void) { + reserve_pio_range(); irqchip_init(); } diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c deleted file mode 100644 index e84ae20c3290..000000000000 --- a/arch/mips/loongson64/pci.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - */ -#include <linux/pci.h> - -#include <pci.h> -#include <loongson.h> -#include <boot_param.h> - -static struct resource loongson_pci_mem_resource = { - .name = "pci memory space", - .start = LOONGSON_PCI_MEM_START, - .end = LOONGSON_PCI_MEM_END, - .flags = IORESOURCE_MEM, -}; - -static struct resource loongson_pci_io_resource = { - .name = "pci io space", - .start = LOONGSON_PCI_IO_START, - .end = IO_SPACE_LIMIT, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller loongson_pci_controller = { - .pci_ops = &loongson_pci_ops, - .io_resource = &loongson_pci_io_resource, - .mem_resource = &loongson_pci_mem_resource, - .mem_offset = 0x00000000UL, - .io_offset = 0x00000000UL, -}; - - -extern int sbx00_acpi_init(void); - -static int __init pcibios_init(void) -{ - - loongson_pci_controller.io_map_base = mips_io_port_base; - loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr; - loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr; - - register_pci_controller(&loongson_pci_controller); - - sbx00_acpi_init(); - - return 0; -} - -arch_initcall(pcibios_init); diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c deleted file mode 100644 index 8d7628c0f513..000000000000 --- a/arch/mips/loongson64/rtc.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Lemote Fuloong platform support - * - * Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/platform_device.h> -#include <linux/mc146818rtc.h> - -static struct resource loongson_rtc_resources[] = { - { - .start = RTC_PORT(0), - .end = RTC_PORT(1), - .flags = IORESOURCE_IO, - }, { - .start = RTC_IRQ, - .end = RTC_IRQ, - .flags = IORESOURCE_IRQ, - } -}; - -static struct platform_device loongson_rtc_device = { - .name = "rtc_cmos", - .id = -1, - .resource = loongson_rtc_resources, - .num_resources = ARRAY_SIZE(loongson_rtc_resources), -}; - - -static int __init loongson_rtc_platform_init(void) -{ - platform_device_register(&loongson_rtc_device); - return 0; -} - -device_initcall(loongson_rtc_platform_init); diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e1fe8bbb377d..e744e1bee49e 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -15,7 +15,6 @@ #include <linux/kexec.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/clock.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <loongson.h> diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c index 1245f22cec84..91e842b58365 100644 --- a/arch/mips/loongson64/time.c +++ b/arch/mips/loongson64/time.c @@ -6,7 +6,7 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ -#include <asm/mc146818-time.h> + #include <asm/time.h> #include <asm/hpet.h> @@ -21,9 +21,3 @@ void __init plat_time_init(void) setup_hpet_timer(); #endif } - -void read_persistent_clock64(struct timespec64 *ts) -{ - ts->tv_sec = mc146818_get_cmos_time(); - ts->tv_nsec = 0; -} diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c new file mode 100644 index 000000000000..9a29e94d3db1 --- /dev/null +++ b/arch/mips/loongson64/vbios_quirk.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/pci.h> +#include <loongson.h> + +static void pci_fixup_radeon(struct pci_dev *pdev) +{ + struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; + + if (res->start) + return; + + if (!loongson_sysconf.vgabios_addr) + return; + + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr); + res->end = res->start + 256*1024 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + + dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", + PCI_ROM_RESOURCE, res); +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 9701c89e7e14..587cf1d115e8 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -439,7 +439,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; } - /* fall through */ + fallthrough; case jr_op: /* For R6, JR already emulated in jalr_op */ if (NO_R6EMU && insn.r_format.func == jr_op) @@ -459,11 +459,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case bltzl_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -483,11 +483,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case bgezl_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -502,12 +502,12 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, break; case jalx_op: set_isa16_mode(bit); - /* fall through */ + fallthrough; case jal_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case j_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 28; @@ -519,7 +519,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case beql_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) @@ -534,7 +534,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bnel_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) @@ -549,7 +549,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case blezl_op: if (!insn.i_format.rt && NO_R6EMU) break; - /* fall through */ + fallthrough; case blez_op: /* @@ -587,7 +587,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) break; - /* fall through */ + fallthrough; case bgtz_op: /* * Compact branches for R6 for the @@ -725,7 +725,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, return 1; } /* R2/R6 compatible cop1 instruction */ - /* fall through */ + fallthrough; case cop2_op: case cop1x_op: if (insn.i_format.rs == bc_op) { @@ -1217,14 +1217,14 @@ emul: case bcfl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* fall through */ + fallthrough; case bcf_op: cond = !cond; break; case bctl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* fall through */ + fallthrough; case bct_op: break; } diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c index a8f98b8157f5..78504736be9e 100644 --- a/arch/mips/math-emu/dp_add.c +++ b/arch/mips/math-emu/dp_add.c @@ -92,8 +92,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c index 2b682e930e39..ac1ecc46248d 100644 --- a/arch/mips/math-emu/dp_div.c +++ b/arch/mips/math-emu/dp_div.c @@ -91,8 +91,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 3eda9ff7b491..126ec90bb4c7 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index b3594a1704a7..35ded4c45989 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index e24ef374d828..931e66f683ca 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -150,8 +150,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c index e8a97d26472a..8a671bb7af12 100644 --- a/arch/mips/math-emu/dp_mul.c +++ b/arch/mips/math-emu/dp_mul.c @@ -89,8 +89,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c index 06be390ba79a..1ee38f8242fd 100644 --- a/arch/mips/math-emu/dp_sqrt.c +++ b/arch/mips/math-emu/dp_sqrt.c @@ -52,8 +52,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) case IEEE754_CLASS_DNORM: DPDNORMX; - /* fall through */ - + fallthrough; case IEEE754_CLASS_NORM: if (xs) { /* sqrt(-x) = Nan */ @@ -130,7 +129,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) switch (oldcsr.rm) { case FPU_CSR_RU: y.bits += 1; - /* fall through */ + fallthrough; case FPU_CSR_RN: t.bits += 1; break; diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index f08aecefceff..08474ad2a64e 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -94,8 +94,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ DPDNORMY; diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c index 9af3ec7302fb..715cd0534301 100644 --- a/arch/mips/math-emu/sp_add.c +++ b/arch/mips/math-emu/sp_add.c @@ -92,8 +92,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c index fcc285f3b48d..2bfa266fdc76 100644 --- a/arch/mips/math-emu/sp_div.c +++ b/arch/mips/math-emu/sp_div.c @@ -91,8 +91,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c index 9f1456109aa8..56417497c88e 100644 --- a/arch/mips/math-emu/sp_fdp.c +++ b/arch/mips/math-emu/sp_fdp.c @@ -34,8 +34,7 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x) case IEEE754_CLASS_SNAN: x = ieee754dp_nanxcpt(x); EXPLODEXDP; - /* fall through */ - + fallthrough; case IEEE754_CLASS_QNAN: y = ieee754sp_nan_fdp(xs, xm); if (!ieee754_csr.nan2008) { diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 4ce1d1f8b499..3fb16a1df3b8 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 7ad867fd7de2..ad2599d4a892 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index 1b85b1a527ac..473ee222d90c 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -119,8 +119,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c index ded17e28e8bc..26cfd63025e9 100644 --- a/arch/mips/math-emu/sp_mul.c +++ b/arch/mips/math-emu/sp_mul.c @@ -89,8 +89,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c index f3d26a1f162c..16c8e9ae63ed 100644 --- a/arch/mips/math-emu/sp_sub.c +++ b/arch/mips/math-emu/sp_sub.c @@ -94,8 +94,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 46f483e952c8..865926a37775 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -23,7 +23,7 @@ obj-y += uasm-mips.o endif obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o -obj-$(CONFIG_64BIT) += pgtable-64.o +obj-$(CONFIG_64BIT) += ioremap64.o pgtable-64.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 36a311348739..6fb83ac7c475 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1049,7 +1049,7 @@ static inline void rm7k_erratum31(void) "cache\t%1, 0x3000(%0)\n\t" ".set pop\n" : - : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill)); + : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I)); } } @@ -1073,12 +1073,12 @@ static inline int alias_74k_erratum(struct cpuinfo_mips *c) if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) present = 1; if (rev == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND); break; case PRID_IMP_1074K: if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { present = 1; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND); } break; default: @@ -1200,7 +1200,7 @@ static void probe_pcache(void) case CPU_VR4133: write_c0_config(config & ~VR41_CONF_P4K); - /* fall through */ + fallthrough; case CPU_VR4131: /* Workaround for cache instruction bug of VR4131 */ if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U || @@ -1303,7 +1303,8 @@ static void probe_pcache(void) c->dcache.linesz; c->dcache.waybit = 0; if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= - (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) c->options |= MIPS_CPU_PREFETCH; break; @@ -1425,7 +1426,7 @@ static void probe_pcache(void) case CPU_74K: case CPU_1074K: has_74k_erratum = alias_74k_erratum(c); - /* Fall through. */ + fallthrough; case CPU_M14KC: case CPU_M14KEC: case CPU_24K: @@ -1449,7 +1450,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_PINDEX; break; } - /* fall through */ + fallthrough; default: if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE) c->dcache.flags |= MIPS_CACHE_ALIASES; @@ -1629,8 +1630,13 @@ static void __init loongson3_sc_init(void) scache_size = c->scache.sets * c->scache.ways * c->scache.linesz; - /* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */ - scache_size *= 4; + + /* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */ + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + scache_size *= 2; + else + scache_size *= 4; + c->scache.waybit = 0; c->scache.waysize = scache_size / c->scache.ways; pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", @@ -1703,9 +1709,10 @@ static void setup_scache(void) return; default: - if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 | - MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)) { + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { #ifdef CONFIG_MIPS_CPU_SCACHE if (mips_sc_init ()) { scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 33b409391ddb..ad6df1cea866 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -36,7 +36,6 @@ EXPORT_SYMBOL_GPL(flush_icache_range); void (*local_flush_icache_range)(unsigned long start, unsigned long end); EXPORT_SYMBOL_GPL(local_flush_icache_range); void (*__flush_icache_user_range)(unsigned long start, unsigned long end); -EXPORT_SYMBOL_GPL(__flush_icache_user_range); void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end); EXPORT_SYMBOL_GPL(__local_flush_icache_user_range); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index fcea92d95d86..563c2c0d0c81 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ static inline bool cpu_needs_post_dma_flush(void) case CPU_R10000: case CPU_R12000: case CPU_BMIPS5000: + case CPU_LOONGSON2EF: return true; default: /* diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 19719e8b41a5..7c9f0c0a6cd3 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -358,17 +358,23 @@ void maar_init(void) write_c0_maari(i); back_to_back_c0_hazard(); upper = read_c0_maar(); +#ifdef CONFIG_XPA + upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif write_c0_maari(i + 1); back_to_back_c0_hazard(); lower = read_c0_maar(); +#ifdef CONFIG_XPA + lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif attr = lower & upper; lower = (lower & MIPS_MAAR_ADDR) << 4; upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; pr_info(" [%d]: ", i / 2); - if (!(attr & MIPS_MAAR_VL)) { + if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) { pr_cont("disabled\n"); continue; } diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 8317f337a86e..b6dad2fd5575 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -14,94 +14,13 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm_types.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/tlbflush.h> +#include <ioremap.h> -static inline void remap_area_pte(pte_t * pte, unsigned long address, - phys_addr_t size, phys_addr_t phys_addr, unsigned long flags) -{ - phys_addr_t end; - unsigned long pfn; - pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE - | __WRITEABLE | flags); - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - BUG_ON(address >= end); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, pgprot)); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, - phys_addr_t size, phys_addr_t phys_addr, unsigned long flags) -{ - phys_addr_t end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - BUG_ON(address >= end); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, - phys_addr_t size, unsigned long flags) -{ - int error; - pgd_t * dir; - unsigned long end = address + size; - - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - flush_cache_all(); - BUG_ON(address >= end); - do { - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - error = -ENOMEM; - p4d = p4d_alloc(&init_mm, dir, address); - if (!p4d) - break; - pud = pud_alloc(&init_mm, p4d, address); - if (!pud) - break; - pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - break; - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_all(); - return error; -} +#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) +#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, void *arg) @@ -118,27 +37,25 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, } /* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * ioremap_prot - map bus memory into CPU space + * @phys_addr: bus address of the memory + * @size: size of the resource to map * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. + * ioremap_prot gives the caller control over cache coherency attributes (CCA) */ - -#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) - -void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, + unsigned long prot_val) { + unsigned long flags = prot_val & _CACHE_MASK; unsigned long offset, pfn, last_pfn; - struct vm_struct * area; + struct vm_struct *area; phys_addr_t last_addr; - void * addr; + unsigned long vaddr; + void __iomem *cpu_addr; + + cpu_addr = plat_ioremap(phys_addr, size, flags); + if (cpu_addr) + return cpu_addr; phys_addr = fixup_bigphys_addr(phys_addr, size); @@ -181,30 +98,22 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vunmap(addr); + vaddr = (unsigned long)area->addr; + + flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE; + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(flags))) { + free_vm_area(area); return NULL; } - return (void __iomem *) (offset + (char *)addr); + return (void __iomem *)(vaddr + offset); } +EXPORT_SYMBOL(ioremap_prot); -#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) - -void __iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *addr) { - struct vm_struct *p; - - if (IS_KSEG1(addr)) - return; - - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) - printk(KERN_ERR "iounmap: bad address %p\n", addr); - - kfree(p); + if (!plat_iounmap(addr) && !IS_KSEG1(addr)) + vunmap((void *)((unsigned long)addr & PAGE_MASK)); } - -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c new file mode 100644 index 000000000000..15e7820d6a5f --- /dev/null +++ b/arch/mips/mm/ioremap64.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/io.h> +#include <ioremap.h> + +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val) +{ + unsigned long flags = prot_val & _CACHE_MASK; + u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); + void __iomem *addr; + + addr = plat_ioremap(offset, size, flags); + if (!addr) + addr = (void __iomem *)(unsigned long)(base + offset); + return addr; +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *addr) +{ + plat_iounmap(addr); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index dbdbfe5d8408..eedad47df24f 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -194,9 +194,10 @@ static inline int __init mips_sc_probe(void) return mips_sc_probe_cm3(); /* Ignore anything but MIPSxx processors */ - if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 | - MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6))) + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))) return 0; /* Does this MIPS32/MIPS64 CPU have a config2 register? */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index da407cdc2135..38c204204529 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -576,7 +576,7 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); - /* fall through */ + fallthrough; case CPU_ALCHEMY: tlbw(p); break; diff --git a/arch/mips/mti-malta/Platform b/arch/mips/mti-malta/Platform index 2cc72c9b38e3..41e0d2a2d325 100644 --- a/arch/mips/mti-malta/Platform +++ b/arch/mips/mti-malta/Platform @@ -1,7 +1,6 @@ # # MIPS Malta board # -platform-$(CONFIG_MIPS_MALTA) += mti-malta/ cflags-$(CONFIG_MIPS_MALTA) += -I$(srctree)/arch/mips/include/asm/mach-malta ifdef CONFIG_KVM_GUEST load-$(CONFIG_MIPS_MALTA) += 0x0000000040100000 diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index ff2c1d809538..893af377aacc 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -90,24 +90,24 @@ static void __init console_config(void) static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa80) : (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } static void __init mips_ejtag_setup(void) { void *base; - extern char except_vec_ejtag_debug; + extern char except_vec_ejtag_debug[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa00) : (void *)(CAC_BASE + 0x300); - memcpy(base, &except_vec_ejtag_debug, 0x80); + memcpy(base, except_vec_ejtag_debug, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/netlogic/Platform b/arch/mips/netlogic/Platform index fb8eb4c0c6ec..4195a097f5f2 100644 --- a/arch/mips/netlogic/Platform +++ b/arch/mips/netlogic/Platform @@ -13,5 +13,4 @@ cflags-$(CONFIG_CPU_XLP) += $(call cc-option,-march=xlp,-march=mips64r2) # # NETLOGIC processor support # -platform-$(CONFIG_NLM_COMMON) += netlogic/ load-$(CONFIG_NLM_COMMON) += 0xffffffff80100000 diff --git a/arch/mips/netlogic/xlr/fmn.c b/arch/mips/netlogic/xlr/fmn.c index d7db1533889a..f90303f31967 100644 --- a/arch/mips/netlogic/xlr/fmn.c +++ b/arch/mips/netlogic/xlr/fmn.c @@ -103,7 +103,7 @@ static irqreturn_t fmn_message_handler(int irq, void *data) mflags = nlm_cop2_enable_irqsave(); } } - }; + } /* Enable message ring intr, to any thread in core */ nlm_fmn_setup_intr(irq, (1 << nlm_threads_per_core) - 1); nlm_cop2_disable_irqrestore(mflags); diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 03db268cba5c..d3996c4c6440 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -110,7 +110,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_LOONGSON64: lmodel = &op_model_loongson3_ops; break; - }; + } /* * Always set the backtrace. This allows unsupported CPU types to still diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index a537bf98912c..1493c49ca47a 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -172,15 +172,15 @@ static void mipsxx_cpu_setup(void *args) case 4: w_c0_perfctrl3(0); w_c0_perfcntr3(reg.counter[3]); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); w_c0_perfcntr2(reg.counter[2]); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); w_c0_perfcntr1(reg.counter[1]); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); w_c0_perfcntr0(reg.counter[0]); @@ -198,13 +198,13 @@ static void mipsxx_cpu_start(void *args) switch (counters) { case 4: w_c0_perfctrl3(WHAT | reg.control[3]); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(WHAT | reg.control[2]); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(WHAT | reg.control[1]); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(WHAT | reg.control[0]); } @@ -221,13 +221,13 @@ static void mipsxx_cpu_stop(void *args) switch (counters) { case 4: w_c0_perfctrl3(0); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); } @@ -245,7 +245,7 @@ static int mipsxx_perfcount_handler(void) switch (counters) { #define HANDLE_COUNTER(n) \ - /* fall through */ \ + fallthrough; \ case n + 1: \ control = r_c0_perfctrl ## n(); \ counter = r_c0_perfcntr ## n(); \ @@ -307,15 +307,15 @@ static void reset_counters(void *arg) case 4: w_c0_perfctrl3(0); w_c0_perfcntr3(0); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); w_c0_perfcntr2(0); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); w_c0_perfcntr1(0); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); w_c0_perfcntr0(0); diff --git a/arch/mips/paravirt/Platform b/arch/mips/paravirt/Platform index 7e76ef25ea17..0b857580dfdd 100644 --- a/arch/mips/paravirt/Platform +++ b/arch/mips/paravirt/Platform @@ -1,7 +1,6 @@ # # Generic para-virtualized guest. # -platform-$(CONFIG_MIPS_PARAVIRT) += paravirt/ cflags-$(CONFIG_MIPS_PARAVIRT) += \ -I$(srctree)/arch/mips/include/asm/mach-paravirt diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 342ce10ef593..0f68d6849978 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -13,10 +13,8 @@ obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o obj-$(CONFIG_MIPS_BONITO64) += ops-bonito64.o obj-$(CONFIG_PCI_GT64XXX_PCI0) += ops-gt64xxx_pci0.o obj-$(CONFIG_MIPS_MSC) += ops-msc.o -obj-$(CONFIG_MIPS_NILE4) += ops-nile4.o obj-$(CONFIG_SOC_TX3927) += ops-tx3927.o obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o -obj-$(CONFIG_NEC_MARKEINS) += ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o obj-$(CONFIG_BCM63XX) += pci-bcm63xx.o fixup-bcm63xx.o \ @@ -31,15 +29,10 @@ obj-$(CONFIG_PCI_XTALK_BRIDGE) += pci-xtalk-bridge.o # These are still pretty much in the old state, watch, go blind. # obj-$(CONFIG_ATH79) += fixup-ath79.o -obj-$(CONFIG_LASAT) += pci-lasat.o obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-loongson2.o obj-$(CONFIG_LEMOTE_MACH2F) += fixup-lemote2f.o ops-loongson2.o -obj-$(CONFIG_MACH_LOONGSON64) += fixup-loongson3.o ops-loongson3.o obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o pci-malta.o -obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o -obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o -obj-$(CONFIG_PMC_MSP7120_FPGA) += fixup-pmcmsp.o ops-pmcmsp.o obj-$(CONFIG_SGI_IP27) += pci-ip27.o obj-$(CONFIG_SGI_IP32) += fixup-ip32.o ops-mace.o pci-ip32.o obj-$(CONFIG_SIBYTE_SB1250) += fixup-sb1250.o pci-sb1250.o diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c deleted file mode 100644 index 2541f9bc12de..000000000000 --- a/arch/mips/pci/fixup-emma2rh.c +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/pci.h> - -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -#define EMMA2RH_PCI_HOST_SLOT 0x09 -#define EMMA2RH_USB_SLOT 0x03 -#define PCI_DEVICE_ID_NEC_EMMA2RH 0x014b /* EMMA2RH PCI Host */ - -/* - * we fix up irqs based on the slot number. - * The first entry is at AD:11. - * Fortunately this works because, although we have two pci buses, - * they all have different slot numbers (except for rockhopper slot 20 - * which is handled below). - * - */ - -#define MAX_SLOT_NUM 10 -static unsigned char irq_map[][5] = { - [3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC, - MARKEINS_PCI_IRQ_INTD, 0,}, - [4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,}, - [5] = {0, 0, 0, 0, 0,}, - [6] = {0, MARKEINS_PCI_IRQ_INTC, MARKEINS_PCI_IRQ_INTD, - MARKEINS_PCI_IRQ_INTA, MARKEINS_PCI_IRQ_INTB,}, -}; - -static void nec_usb_controller_fixup(struct pci_dev *dev) -{ - if (PCI_SLOT(dev->devfn) == EMMA2RH_USB_SLOT) - /* on board USB controller configuration */ - pci_write_config_dword(dev, 0xe4, 1 << 5); -} - -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, - nec_usb_controller_fixup); - -/* - * Prevent the PCI layer from seeing the resources allocated to this device - * if it is the host bridge by marking it as such. These resources are of - * no consequence to the PCI layer (they are handled elsewhere). - */ -static void emma2rh_pci_host_fixup(struct pci_dev *dev) -{ - int i; - - if (PCI_SLOT(dev->devfn) == EMMA2RH_PCI_HOST_SLOT) { - dev->class &= 0xff; - dev->class |= PCI_CLASS_BRIDGE_HOST << 8; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - } -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH, - emma2rh_pci_host_fixup); - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - return irq_map[slot][pin]; -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c deleted file mode 100644 index 8a741c2c6685..000000000000 --- a/arch/mips/pci/fixup-loongson3.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * fixup-loongson3.c - * - * Copyright (C) 2012 Lemote, Inc. - * Author: Xiang Yu, xiangy@lemote.com - * Chen Huacai, chenhc@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/pci.h> -#include <boot_param.h> - -static void print_fixup_info(const struct pci_dev *pdev) -{ - dev_info(&pdev->dev, "Device %x:%x, irq %d\n", - pdev->vendor, pdev->device, pdev->irq); -} - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - print_fixup_info(dev); - return dev->irq; -} - -static void pci_fixup_radeon(struct pci_dev *pdev) -{ - struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; - - if (res->start) - return; - - if (!loongson_sysconf.vgabios_addr) - return; - - pci_disable_rom(pdev); - if (res->parent) - release_resource(res); - - res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr); - res->end = res->start + 256*1024 - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | - IORESOURCE_PCI_FIXED; - - dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", - PCI_ROM_RESOURCE, res); -} - -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c deleted file mode 100644 index 4ad2ef02087b..000000000000 --- a/arch/mips/pci/fixup-pmcmsp.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * PMC-Sierra MSP board specific pci fixups. - * - * Copyright 2001 MontaVista Software Inc. - * Copyright 2005-2007 PMC-Sierra, Inc - * - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifdef CONFIG_PCI - -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/byteorder.h> - -#include <msp_pci.h> -#include <msp_cic_int.h> - -/* PCI interrupt pins */ -#define IRQ4 MSP_INT_EXT4 -#define IRQ5 MSP_INT_EXT5 -#define IRQ6 MSP_INT_EXT6 - -#if defined(CONFIG_PMC_MSP7120_GW) -/* Garibaldi Board IRQ wiring to PCI slots */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, 0, 0, 0, 0 }, /* 6 (AD[16]): Unused */ - {0, 0, 0, 0, 0 }, /* 7 (AD[17]): Unused */ - {0, 0, 0, 0, 0 }, /* 8 (AD[18]): Unused */ - {0, 0, 0, 0, 0 }, /* 9 (AD[19]): Unused */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, IRQ4, IRQ4, 0, 0 }, /* 18 (AD[28]): slot 0 */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, IRQ5, IRQ5, 0, 0 }, /* 20 (AD[30]): slot 1 */ - {0, IRQ6, IRQ6, 0, 0 } /* 21 (AD[31]): slot 2 */ -}; - -#elif defined(CONFIG_PMC_MSP7120_EVAL) - -/* MSP7120 Eval Board IRQ wiring to PCI slots */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, IRQ6, IRQ6, 0, 0 }, /* 6 (AD[16]): slot 3 (mini) */ - {0, IRQ5, IRQ5, 0, 0 }, /* 7 (AD[17]): slot 2 (mini) */ - {0, IRQ4, IRQ4, IRQ4, IRQ4}, /* 8 (AD[18]): slot 0 (PCI) */ - {0, IRQ5, IRQ5, IRQ5, IRQ5}, /* 9 (AD[19]): slot 1 (PCI) */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, 0, 0, 0, 0 }, /* 18 (AD[28]): Unused */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, 0, 0, 0, 0 }, /* 20 (AD[30]): Unused */ - {0, 0, 0, 0, 0 } /* 21 (AD[31]): Unused */ -}; - -#else - -/* Unknown board -- don't assign any IRQs */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, 0, 0, 0, 0 }, /* 6 (AD[16]): Unused */ - {0, 0, 0, 0, 0 }, /* 7 (AD[17]): Unused */ - {0, 0, 0, 0, 0 }, /* 8 (AD[18]): Unused */ - {0, 0, 0, 0, 0 }, /* 9 (AD[19]): Unused */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, 0, 0, 0, 0 }, /* 18 (AD[28]): Unused */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, 0, 0, 0, 0 }, /* 20 (AD[30]): Unused */ - {0, 0, 0, 0, 0 } /* 21 (AD[31]): Unused */ -}; -#endif - -/***************************************************************************** - * - * FUNCTION: pcibios_plat_dev_init - * _________________________________________________________________________ - * - * DESCRIPTION: Perform platform specific device initialization at - * pci_enable_device() time. - * None are needed for the MSP7120 PCI Controller. - * - * INPUTS: dev - structure describing the PCI device - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - * - ****************************************************************************/ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: pcibios_map_irq - * _________________________________________________________________________ - * - * DESCRIPTION: Perform board supplied PCI IRQ mapping routine. - * - * INPUTS: dev - unused - * slot - PCI slot. Identified by which bit of the AD[] bus - * drives the IDSEL line. AD[10] is 0, AD[31] is - * slot 21. - * pin - numbered using the scheme of the PCI_INTERRUPT_PIN - * field of the config header. - * - * OUTPUTS: none - * - * RETURNS: IRQ number - * - ****************************************************************************/ -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ -#if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL) - printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n"); -#endif - printk(KERN_WARNING "PCI: irq_tab returned %d for slot=%d pin=%d\n", - irq_tab[slot][pin], slot, pin); - - return irq_tab[slot][pin]; -} - -#endif /* CONFIG_PCI */ diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index adb9a58641e8..de012f8bd8c3 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -151,8 +151,7 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) case SNI_BRD_PCI_MTOWER: if (is_rm300_revd()) return irq_tab_rm300d[slot][pin]; - /* fall through */ - + fallthrough; case SNI_BRD_PCI_DESKTOP: return irq_tab_rm200[slot][pin]; diff --git a/arch/mips/pci/ops-bcm63xx.c b/arch/mips/pci/ops-bcm63xx.c index 925c72348fb6..dc6dc2741272 100644 --- a/arch/mips/pci/ops-bcm63xx.c +++ b/arch/mips/pci/ops-bcm63xx.c @@ -474,7 +474,7 @@ static int bcm63xx_pcie_can_access(struct pci_bus *bus, int devfn) if (PCI_SLOT(devfn) == 0) return bcm_pcie_readl(PCIE_DLSTATUS_REG) & DLSTATUS_PHYLINKUP; - /* else, fall through */ + fallthrough; default: return false; } diff --git a/arch/mips/pci/ops-emma2rh.c b/arch/mips/pci/ops-emma2rh.c deleted file mode 100644 index 65f47344536c..000000000000 --- a/arch/mips/pci/ops-emma2rh.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/pci/ops-vr41xx.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/types.h> - -#include <asm/addrspace.h> - -#include <asm/emma/emma2rh.h> - -#define RTABORT (0x1<<9) -#define RMABORT (0x1<<10) -#define EMMA2RH_PCI_SLOT_NUM 9 /* 0000:09.0 is final PCI device */ - -/* - * access config space - */ - -static int check_args(struct pci_bus *bus, u32 devfn, u32 * bus_num) -{ - /* check if the bus is top-level */ - if (bus->parent != NULL) - *bus_num = bus->number; - else - *bus_num = 0; - - if (*bus_num == 0) { - /* Type 0 */ - if (PCI_SLOT(devfn) >= 10) - return PCIBIOS_DEVICE_NOT_FOUND; - } else { - /* Type 1 */ - if ((*bus_num >= 64) || (PCI_SLOT(devfn) >= 16)) - return PCIBIOS_DEVICE_NOT_FOUND; - } - return 0; -} - -static inline int set_pci_configuration_address(unsigned char bus_num, - unsigned int devfn, int where) -{ - u32 config_win0; - - emma2rh_out32(EMMA2RH_PCI_INT, ~RMABORT); - if (bus_num == 0) - /* - * Type 0 configuration - */ - config_win0 = (1 << (22 + PCI_SLOT(devfn))) | (5 << 9); - else - /* - * Type 1 configuration - */ - config_win0 = (bus_num << 26) | (PCI_SLOT(devfn) << 22) | - (1 << 15) | (5 << 9); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, config_win0); - - return 0; -} - -static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where, - int size, uint32_t * val) -{ - u32 bus_num; - u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE); - u32 backup_win0; - u32 data; - - *val = 0xffffffffU; - - if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND) - return PCIBIOS_DEVICE_NOT_FOUND; - - backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR); - - if (set_pci_configuration_address(bus_num, devfn, where) < 0) - return PCIBIOS_DEVICE_NOT_FOUND; - - data = - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)); - - switch (size) { - case 1: - *val = (data >> ((where & 3) << 3)) & 0xffU; - break; - case 2: - *val = (data >> ((where & 2) << 3)) & 0xffffU; - break; - case 4: - *val = data; - break; - default: - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - return PCIBIOS_FUNC_NOT_SUPPORTED; - } - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - - if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where, - int size, u32 val) -{ - u32 bus_num; - u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE); - u32 backup_win0; - u32 data; - int shift; - - if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND) - return PCIBIOS_DEVICE_NOT_FOUND; - - backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR); - - if (set_pci_configuration_address(bus_num, devfn, where) < 0) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* read modify write */ - data = - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)); - - switch (size) { - case 1: - shift = (where & 3) << 3; - data &= ~(0xffU << shift); - data |= ((val & 0xffU) << shift); - break; - case 2: - shift = (where & 2) << 3; - data &= ~(0xffffU << shift); - data |= ((val & 0xffffU) << shift); - break; - case 4: - data = val; - break; - default: - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - return PCIBIOS_FUNC_NOT_SUPPORTED; - } - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)) = data; - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -struct pci_ops emma2rh_pci_ops = { - .read = pci_config_read, - .write = pci_config_write, -}; diff --git a/arch/mips/pci/ops-loongson3.c b/arch/mips/pci/ops-loongson3.c deleted file mode 100644 index 2f6ad36bdea6..000000000000 --- a/arch/mips/pci/ops-loongson3.c +++ /dev/null @@ -1,116 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> - -#include <asm/mips-boards/bonito64.h> - -#include <loongson.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#define HT1LO_PCICFG_BASE 0x1a000000 -#define HT1LO_PCICFG_BASE_TP1 0x1b000000 - -static int loongson3_pci_config_access(unsigned char access_type, - struct pci_bus *bus, unsigned int devfn, - int where, u32 *data) -{ - unsigned char busnum = bus->number; - int function = PCI_FUNC(devfn); - int device = PCI_SLOT(devfn); - int reg = where & ~3; - void *addrp; - u64 addr; - - if (where < PCI_CFG_SPACE_SIZE) { /* standard config */ - addr = (busnum << 16) | (device << 11) | (function << 8) | reg; - if (busnum == 0) { - if (device > 31) - return PCIBIOS_DEVICE_NOT_FOUND; - addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr); - } else { - addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr); - } - } else if (where < PCI_CFG_SPACE_EXP_SIZE) { /* extended config */ - struct pci_dev *rootdev; - - rootdev = pci_get_domain_bus_and_slot(0, 0, 0); - if (!rootdev) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = pci_resource_start(rootdev, 3); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr |= busnum << 20 | device << 15 | function << 12 | reg; - addrp = (void *)TO_UNCAC(addr); - } else { - return PCIBIOS_DEVICE_NOT_FOUND; - } - - if (access_type == PCI_ACCESS_WRITE) - writel(*data, addrp); - else { - *data = readl(addrp); - if (*data == 0xffffffff) { - *data = -1; - return PCIBIOS_DEVICE_NOT_FOUND; - } - } - return PCIBIOS_SUCCESSFUL; -} - -static int loongson3_pci_pcibios_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *val) -{ - u32 data = 0; - int ret = loongson3_pci_config_access(PCI_ACCESS_READ, - bus, devfn, where, &data); - - if (ret != PCIBIOS_SUCCESSFUL) - return ret; - - if (size == 1) - *val = (data >> ((where & 3) << 3)) & 0xff; - else if (size == 2) - *val = (data >> ((where & 3) << 3)) & 0xffff; - else - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -static int loongson3_pci_pcibios_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - u32 data = 0; - int ret; - - if (size == 4) - data = val; - else { - ret = loongson3_pci_config_access(PCI_ACCESS_READ, - bus, devfn, where, &data); - if (ret != PCIBIOS_SUCCESSFUL) - return ret; - - if (size == 1) - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else if (size == 2) - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - } - - ret = loongson3_pci_config_access(PCI_ACCESS_WRITE, - bus, devfn, where, &data); - - return ret; -} - -struct pci_ops loongson_pci_ops = { - .read = loongson3_pci_pcibios_read, - .write = loongson3_pci_pcibios_write -}; diff --git a/arch/mips/pci/ops-nile4.c b/arch/mips/pci/ops-nile4.c deleted file mode 100644 index b00658d19116..000000000000 --- a/arch/mips/pci/ops-nile4.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/pci.h> -#include <asm/bootinfo.h> - -#include <asm/lasat/lasat.h> -#include <asm/nile4.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#define LO(reg) (reg / 4) -#define HI(reg) (reg / 4 + 1) - -volatile unsigned long *const vrc_pciregs = (void *) Vrc5074_BASE; - -static int nile4_pcibios_config_access(unsigned char access_type, - struct pci_bus *bus, unsigned int devfn, int where, u32 *val) -{ - unsigned char busnum = bus->number; - u32 adr, mask, err; - - if ((busnum == 0) && (PCI_SLOT(devfn) > 8)) - /* The addressing scheme chosen leaves room for just - * 8 devices on the first busnum (besides the PCI - * controller itself) */ - return PCIBIOS_DEVICE_NOT_FOUND; - - if ((busnum == 0) && (devfn == PCI_DEVFN(0, 0))) { - /* Access controller registers directly */ - if (access_type == PCI_ACCESS_WRITE) { - vrc_pciregs[(0x200 + where) >> 2] = *val; - } else { - *val = vrc_pciregs[(0x200 + where) >> 2]; - } - return PCIBIOS_SUCCESSFUL; - } - - /* Temporarily map PCI Window 1 to config space */ - mask = vrc_pciregs[LO(NILE4_PCIINIT1)]; - vrc_pciregs[LO(NILE4_PCIINIT1)] = 0x0000001a | (busnum ? 0x200 : 0); - - /* Clear PCI Error register. This also clears the Error Type - * bits in the Control register */ - vrc_pciregs[LO(NILE4_PCIERR)] = 0; - vrc_pciregs[HI(NILE4_PCIERR)] = 0; - - /* Setup address */ - if (busnum == 0) - adr = - KSEG1ADDR(PCI_WINDOW1) + - ((1 << (PCI_SLOT(devfn) + 15)) | (PCI_FUNC(devfn) << 8) - | (where & ~3)); - else - adr = KSEG1ADDR(PCI_WINDOW1) | (busnum << 16) | (devfn << 8) | - (where & ~3); - - if (access_type == PCI_ACCESS_WRITE) - *(u32 *) adr = *val; - else - *val = *(u32 *) adr; - - /* Check for master or target abort */ - err = (vrc_pciregs[HI(NILE4_PCICTRL)] >> 5) & 0x7; - - /* Restore PCI Window 1 */ - vrc_pciregs[LO(NILE4_PCIINIT1)] = mask; - - if (err) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int nile4_pcibios_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *val) -{ - u32 data = 0; - int err; - - if ((size == 2) && (where & 1)) - return PCIBIOS_BAD_REGISTER_NUMBER; - else if ((size == 4) && (where & 3)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where, - &data); - if (err) - return err; - - if (size == 1) - *val = (data >> ((where & 3) << 3)) & 0xff; - else if (size == 2) - *val = (data >> ((where & 3) << 3)) & 0xffff; - else - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -static int nile4_pcibios_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - u32 data = 0; - int err; - - if ((size == 2) && (where & 1)) - return PCIBIOS_BAD_REGISTER_NUMBER; - else if ((size == 4) && (where & 3)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where, - &data); - if (err) - return err; - - if (size == 1) - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else if (size == 2) - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else - data = val; - - if (nile4_pcibios_config_access - (PCI_ACCESS_WRITE, bus, devfn, where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -struct pci_ops nile4_pci_ops = { - .read = nile4_pcibios_read, - .write = nile4_pcibios_write, -}; diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c deleted file mode 100644 index ad5dd711c575..000000000000 --- a/arch/mips/pci/ops-pmcmsp.c +++ /dev/null @@ -1,944 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PMC-Sierra MSP board specific pci_ops - * - * Copyright 2001 MontaVista Software Inc. - * Copyright 2005-2007 PMC-Sierra, Inc - * - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * - * Much of the code is derived from the original DDB5074 port by - * Geert Uytterhoeven <geert@linux-m68k.org> - */ - -#define PCI_COUNTERS 1 - -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/interrupt.h> - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/byteorder.h> -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) -#include <asm/mipsmtregs.h> -#endif - -#include <msp_prom.h> -#include <msp_cic_int.h> -#include <msp_pci.h> -#include <msp_regs.h> -#include <msp_regops.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) -static char proc_init; -extern struct proc_dir_entry *proc_bus_pci_dir; -unsigned int pci_int_count[32]; - -static void pci_proc_init(void); - -/***************************************************************************** - * - * FUNCTION: show_msp_pci_counts - * _________________________________________________________________________ - * - * DESCRIPTION: Prints the count of how many times each PCI - * interrupt has asserted. Can be invoked by the - * /proc filesystem. - * - * INPUTS: m - synthetic file construction data - * v - iterator - * - * RETURNS: 0 or error - * - ****************************************************************************/ -static int show_msp_pci_counts(struct seq_file *m, void *v) -{ - int i; - unsigned int intcount, total = 0; - - for (i = 0; i < 32; ++i) { - intcount = pci_int_count[i]; - if (intcount != 0) { - seq_printf(m, "[%d] = %u\n", i, intcount); - total += intcount; - } - } - - seq_printf(m, "total = %u\n", total); - return 0; -} - -/***************************************************************************** - * - * FUNCTION: gen_pci_cfg_wr_show - * _________________________________________________________________________ - * - * DESCRIPTION: Generates a configuration write cycle for debug purposes. - * The IDSEL line asserted and location and data written are - * immaterial. Just want to be able to prove that a - * configuration write can be correctly generated on the - * PCI bus. Intent is that this function by invocable from - * the /proc filesystem. - * - * INPUTS: m - synthetic file construction data - * v - iterator - * - * RETURNS: 0 or error - * - ****************************************************************************/ -static int gen_pci_cfg_wr_show(struct seq_file *m, void *v) -{ - unsigned char where = 0; /* Write to static Device/Vendor ID */ - unsigned char bus_num = 0; /* Bus 0 */ - unsigned char dev_fn = 0xF; /* Arbitrary device number */ - u32 wr_data = 0xFF00AA00; /* Arbitrary data */ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned long value; - int intr; - - seq_puts(m, "PMC MSP PCI: Beginning\n"); - - if (proc_init == 0) { - pci_proc_init(); - proc_init = ~0; - } - - seq_puts(m, "PMC MSP PCI: Before Cfg Wr\n"); - - /* - * Generate PCI Configuration Write Cycle - */ - - /* Clear cause register bits */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - - /* Setup address that is to appear on PCI bus */ - preg->config_addr = BPCI_CFGADDR_ENABLE | - (bus_num << BPCI_CFGADDR_BUSNUM_SHF) | - (dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) | - (where & 0xFC); - - value = cpu_to_le32(wr_data); - - /* Launch the PCI configuration write cycle */ - *PCI_CONFIG_SPACE_REG = value; - - /* - * Check if the PCI configuration cycle (rd or wr) succeeded, by - * checking the status bits for errors like master or target abort. - */ - intr = preg->if_status; - - seq_puts(m, "PMC MSP PCI: After Cfg Wr\n"); - return 0; -} - -/***************************************************************************** - * - * FUNCTION: pci_proc_init - * _________________________________________________________________________ - * - * DESCRIPTION: Create entries in the /proc filesystem for debug access. - * - * INPUTS: none - * - * OUTPUTS: none - * - * RETURNS: none - * - ****************************************************************************/ -static void pci_proc_init(void) -{ - proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts); - proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show); -} -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - -/***************************************************************************** - * - * STRUCT: pci_io_resource - * _________________________________________________________________________ - * - * DESCRIPTION: Defines the address range that pciauto() will use to - * assign to the I/O BARs of PCI devices. - * - * Use the start and end addresses of the MSP7120 PCI Host - * Controller I/O space, in the form that they appear on the - * PCI bus AFTER MSP7120 has performed address translation. - * - * For I/O accesses, MSP7120 ignores OATRAN and maps I/O - * accesses into the bottom 0xFFF region of address space, - * so that is the range to put into the pci_io_resource - * struct. - * - * In MSP4200, the start address was 0x04 instead of the - * expected 0x00. Will just assume there was a good reason - * for this! - * - * NOTES: Linux, by default, will assign I/O space to the lowest - * region of address space. Since MSP7120 and Linux, - * by default, have no offset in between how they map, the - * io_offset element of pci_controller struct should be set - * to zero. - * ELEMENTS: - * name - String used for a meaningful name. - * - * start - Start address of MSP7120's I/O space, as MSP7120 presents - * the address on the PCI bus. - * - * end - End address of MSP7120's I/O space, as MSP7120 presents - * the address on the PCI bus. - * - * flags - Attributes indicating the type of resource. In this case, - * indicate I/O space. - * - ****************************************************************************/ -static struct resource pci_io_resource = { - .name = "pci IO space", - .start = 0x04, - .end = 0x0FFF, - .flags = IORESOURCE_IO /* I/O space */ -}; - -/***************************************************************************** - * - * STRUCT: pci_mem_resource - * _________________________________________________________________________ - * - * DESCRIPTION: Defines the address range that pciauto() will use to - * assign to the memory BARs of PCI devices. - * - * The .start and .end values are dependent upon how address - * translation is performed by the OATRAN regiser. - * - * The values to use for .start and .end are the values - * in the form they appear on the PCI bus AFTER MSP7120 has - * performed OATRAN address translation. - * - * ELEMENTS: - * name - String used for a meaningful name. - * - * start - Start address of MSP7120's memory space, as MSP7120 presents - * the address on the PCI bus. - * - * end - End address of MSP7120's memory space, as MSP7120 presents - * the address on the PCI bus. - * - * flags - Attributes indicating the type of resource. In this case, - * indicate memory space. - * - ****************************************************************************/ -static struct resource pci_mem_resource = { - .name = "pci memory space", - .start = MSP_PCI_SPACE_BASE, - .end = MSP_PCI_SPACE_END, - .flags = IORESOURCE_MEM /* memory space */ -}; - -/***************************************************************************** - * - * FUNCTION: bpci_interrupt - * _________________________________________________________________________ - * - * DESCRIPTION: PCI status interrupt handler. Updates the count of how - * many times each status bit has been set, then clears - * the status bits. If the appropriate macros are defined, - * these counts can be viewed via the /proc filesystem. - * - * INPUTS: irq - unused - * dev_id - unused - * pt_regs - unused - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * - ****************************************************************************/ -static irqreturn_t bpci_interrupt(int irq, void *dev_id) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned int stat = preg->if_status; - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) - int i; - for (i = 0; i < 32; ++i) { - if ((1 << i) & stat) - ++pci_int_count[i]; - } -#endif /* PROC_FS && PCI_COUNTERS */ - - /* printk("PCI ISR: Status=%08X\n", stat); */ - - /* write to clear all asserted interrupts */ - preg->if_status = stat; - - return IRQ_HANDLED; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_config_access - * _________________________________________________________________________ - * - * DESCRIPTION: Performs a PCI configuration access (rd or wr), then - * checks that the access succeeded by querying MSP7120's - * PCI status bits. - * - * INPUTS: - * access_type - kind of PCI configuration cycle to perform - * (read or write). Legal values are - * PCI_ACCESS_WRITE and PCI_ACCESS_READ. - * - * bus - pointer to the bus number of the device to - * be targeted for the configuration cycle. - * The only element of the pci_bus structure - * used is bus->number. This argument determines - * if the configuration access will be Type 0 or - * Type 1. Since MSP7120 assumes itself to be the - * PCI Host, any non-zero bus->number generates - * a Type 1 access. - * - * devfn - this is an 8-bit field. The lower three bits - * specify the function number of the device to - * be targeted for the configuration cycle, with - * all three-bit combinations being legal. The - * upper five bits specify the device number, - * with legal values being 10 to 31. - * - * where - address within the Configuration Header - * space to access. - * - * data - for write accesses, contains the data to - * write. - * - * OUTPUTS: - * data - for read accesses, contains the value read. - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - access failure - * - ****************************************************************************/ -int msp_pcibios_config_access(unsigned char access_type, - struct pci_bus *bus, - unsigned int devfn, - unsigned char where, - u32 *data) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned char bus_num = bus->number; - unsigned char dev_fn = (unsigned char)devfn; - unsigned long intr; - unsigned long value; - static char pciirqflag; - int ret; -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - unsigned int vpe_status; -#endif - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) - if (proc_init == 0) { - pci_proc_init(); - proc_init = ~0; - } -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - - /* - * Just the first time this function invokes, allocate - * an interrupt line for PCI host status interrupts. The - * allocation assigns an interrupt handler to the interrupt. - */ - if (pciirqflag == 0) { - ret = request_irq(MSP_INT_PCI,/* Hardcoded internal MSP7120 wiring */ - bpci_interrupt, - IRQF_SHARED, - "PMC MSP PCI Host", - preg); - if (ret != 0) - return ret; - pciirqflag = ~0; - } - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - vpe_status = dvpe(); -#endif - - /* - * Clear PCI cause register bits. - * - * In Polo, the PCI Host had a dedicated DMA called the - * Block Copy (not to be confused with the general purpose Block - * Copy Engine block). There appear to have been special interrupts - * for this Block Copy, called Block Copy 0 Fault (BC0F) and - * Block Copy 1 Fault (BC1F). MSP4200 and MSP7120 don't have this - * dedicated Block Copy block, so these two interrupts are now - * marked reserved. In case the Block Copy is resurrected in a - * future design, maintain the code that treats these two interrupts - * specially. - * - * Write to clear all interrupts in the PCI status register, aside - * from BC0F and BC1F. - */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - - /* Setup address that is to appear on PCI bus */ - preg->config_addr = BPCI_CFGADDR_ENABLE | - (bus_num << BPCI_CFGADDR_BUSNUM_SHF) | - (dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) | - (where & 0xFC); - - /* IF access is a PCI configuration write */ - if (access_type == PCI_ACCESS_WRITE) { - value = cpu_to_le32(*data); - *PCI_CONFIG_SPACE_REG = value; - } else { - /* ELSE access is a PCI configuration read */ - value = le32_to_cpu(*PCI_CONFIG_SPACE_REG); - *data = value; - } - - /* - * Check if the PCI configuration cycle (rd or wr) succeeded, by - * checking the status bits for errors like master or target abort. - */ - intr = preg->if_status; - - /* Clear config access */ - preg->config_addr = 0; - - /* IF error occurred */ - if (intr & ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F)) { - /* Clear status bits */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - evpe(vpe_status); -#endif - - return -1; - } - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - evpe(vpe_status); -#endif - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_byte - * _________________________________________________________________________ - * - * DESCRIPTION: Read a byte from PCI configuration address spac - * Since the hardware can't address 8 bit chunks - * directly, read a 32-bit chunk, then mask off extraneous - * bits. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_byte(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = (data >> ((where & 3) << 3)) & 0x0ff; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_word - * _________________________________________________________________________ - * - * DESCRIPTION: Read a word (16 bits) from PCI configuration address space. - * Since the hardware can't address 16 bit chunks - * directly, read a 32-bit chunk, then mask off extraneous - * bits. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_word(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* if (where & 1) */ /* Commented out non-compliant code. - * Should allow word access to configuration - * registers, with only exception being when - * the word access would wrap around into - * the next dword. - */ - if ((where & 3) == 3) { - *val = 0xFFFFFFFF; - return PCIBIOS_BAD_REGISTER_NUMBER; - } - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = (data >> ((where & 3) << 3)) & 0x0ffff; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_dword - * _________________________________________________________________________ - * - * DESCRIPTION: Read a double word (32 bits) from PCI configuration - * address space. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_dword(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* Address must be dword aligned. */ - if (where & 3) { - *val = 0xFFFFFFFF; - return PCIBIOS_BAD_REGISTER_NUMBER; - } - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_byte - * _________________________________________________________________________ - * - * DESCRIPTION: Write a byte to PCI configuration address space. - * Since the hardware can't address 8 bit chunks - * directly, a read-modify-write is performed. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_byte(struct pci_bus *bus, - unsigned int devfn, - int where, - u8 val) -{ - u32 data = 0; - - /* read config space */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) - return -1; - - /* modify the byte within the dword */ - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - /* write back the full dword */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_word - * _________________________________________________________________________ - * - * DESCRIPTION: Write a word (16-bits) to PCI configuration address space. - * Since the hardware can't address 16 bit chunks - * directly, a read-modify-write is performed. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_word(struct pci_bus *bus, - unsigned int devfn, - int where, - u16 val) -{ - u32 data = 0; - - /* Fixed non-compliance: if (where & 1) */ - if ((where & 3) == 3) - return PCIBIOS_BAD_REGISTER_NUMBER; - - /* read config space */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) - return -1; - - /* modify the word within the dword */ - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - /* write back the full dword */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_dword - * _________________________________________________________________________ - * - * DESCRIPTION: Write a double word (32-bits) to PCI configuration address - * space. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_dword(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 val) -{ - /* check that address is dword aligned */ - if (where & 3) - return PCIBIOS_BAD_REGISTER_NUMBER; - - /* perform write */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &val)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config - * _________________________________________________________________________ - * - * DESCRIPTION: Interface the PCI configuration read request with - * the appropriate function, based on how many bytes - * the read request is. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * size - in units of bytes, should be 1, 2, or 4. - * - * OUTPUTS val - value read, with any extraneous bytes masked - * to zero. - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - failure - * - ****************************************************************************/ -int -msp_pcibios_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, - int size, - u32 *val) -{ - if (size == 1) { - if (msp_pcibios_read_config_byte(bus, devfn, where, val)) { - return -1; - } - } else if (size == 2) { - if (msp_pcibios_read_config_word(bus, devfn, where, val)) { - return -1; - } - } else if (size == 4) { - if (msp_pcibios_read_config_dword(bus, devfn, where, val)) { - return -1; - } - } else { - *val = 0xFFFFFFFF; - return -1; - } - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config - * _________________________________________________________________________ - * - * DESCRIPTION: Interface the PCI configuration write request with - * the appropriate function, based on how many bytes - * the read request is. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * size - in units of bytes, should be 1, 2, or 4. - * val - value to write - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - failure - * - ****************************************************************************/ -int -msp_pcibios_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, - int size, - u32 val) -{ - if (size == 1) { - if (msp_pcibios_write_config_byte(bus, devfn, - where, (u8)(0xFF & val))) { - return -1; - } - } else if (size == 2) { - if (msp_pcibios_write_config_word(bus, devfn, - where, (u16)(0xFFFF & val))) { - return -1; - } - } else if (size == 4) { - if (msp_pcibios_write_config_dword(bus, devfn, where, val)) { - return -1; - } - } else { - return -1; - } - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * STRUCTURE: msp_pci_ops - * _________________________________________________________________________ - * - * DESCRIPTION: structure to abstract the hardware specific PCI - * configuration accesses. - * - * ELEMENTS: - * read - function for Linux to generate PCI Configuration reads. - * write - function for Linux to generate PCI Configuration writes. - * - ****************************************************************************/ -struct pci_ops msp_pci_ops = { - .read = msp_pcibios_read_config, - .write = msp_pcibios_write_config -}; - -/***************************************************************************** - * - * STRUCTURE: msp_pci_controller - * _________________________________________________________________________ - * - * Describes the attributes of the MSP7120 PCI Host Controller - * - * ELEMENTS: - * pci_ops - abstracts the hardware specific PCI configuration - * accesses. - * - * mem_resource - address range pciauto() uses to assign to PCI device - * memory BARs. - * - * mem_offset - offset between how MSP7120 outbound PCI memory - * transaction addresses appear on the PCI bus and how Linux - * wants to configure memory BARs of the PCI devices. - * MSP7120 does nothing funky, so just set to zero. - * - * io_resource - address range pciauto() uses to assign to PCI device - * I/O BARs. - * - * io_offset - offset between how MSP7120 outbound PCI I/O - * transaction addresses appear on the PCI bus and how - * Linux defaults to configure I/O BARs of the PCI devices. - * MSP7120 maps outbound I/O accesses into the bottom - * bottom 4K of PCI address space (and ignores OATRAN). - * Since the Linux default is to configure I/O BARs to the - * bottom 4K, no special offset is needed. Just set to zero. - * - ****************************************************************************/ -static struct pci_controller msp_pci_controller = { - .pci_ops = &msp_pci_ops, - .mem_resource = &pci_mem_resource, - .mem_offset = 0, - .io_map_base = MSP_PCI_IOSPACE_BASE, - .io_resource = &pci_io_resource, - .io_offset = 0 -}; - -/***************************************************************************** - * - * FUNCTION: msp_pci_init - * _________________________________________________________________________ - * - * DESCRIPTION: Initialize the PCI Host Controller and register it with - * Linux so Linux can seize control of the PCI bus. - * - ****************************************************************************/ -void __init msp_pci_init(void) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - u32 id; - - /* Extract Device ID */ - id = read_reg32(PCI_JTAG_DEVID_REG, 0xFFFF) >> 12; - - /* Check if JTAG ID identifies MSP7120 */ - if (!MSP_HAS_PCI(id)) { - printk(KERN_WARNING "PCI: No PCI; id reads as %x\n", id); - goto no_pci; - } - - /* - * Enable flushing of the PCI-SDRAM queue upon a read - * of the SDRAM's Memory Configuration Register. - */ - *(unsigned long *)QFLUSH_REG_1 = 3; - - /* Configure PCI Host Controller. */ - preg->if_status = ~0; /* Clear cause register bits */ - preg->config_addr = 0; /* Clear config access */ - preg->oatran = MSP_PCI_OATRAN; /* PCI outbound addr translation */ - preg->if_mask = 0xF8BF87C0; /* Enable all PCI status interrupts */ - - /* configure so inb(), outb(), and family are functional */ - set_io_port_base(MSP_PCI_IOSPACE_BASE); - - /* Tell Linux the details of the MSP7120 PCI Host Controller */ - register_pci_controller(&msp_pci_controller); - - return; - -no_pci: - /* Disable PCI channel */ - printk(KERN_WARNING "PCI: no host PCI bus detected\n"); -} diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index 01a2af8215c8..7285b5667568 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -52,7 +52,7 @@ struct alchemy_pci_context { static struct alchemy_pci_context *__alchemy_pci_ctx; -/* IO/MEM resources for PCI. Keep the memres in sync with __fixup_bigphys_addr +/* IO/MEM resources for PCI. Keep the memres in sync with fixup_bigphys_addr * in arch/mips/alchemy/common/setup.c */ static struct resource alchemy_pci_def_memres = { diff --git a/arch/mips/pci/pci-emma2rh.c b/arch/mips/pci/pci-emma2rh.c deleted file mode 100644 index 156091a3e341..000000000000 --- a/arch/mips/pci/pci-emma2rh.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/pci.h> - -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -static struct resource pci_io_resource = { - .name = "pci IO space", - .start = EMMA2RH_PCI_IO_BASE, - .end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO, -}; - -static struct resource pci_mem_resource = { - .name = "pci memory space", - .start = EMMA2RH_PCI_MEM_BASE, - .end = EMMA2RH_PCI_MEM_BASE + EMMA2RH_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM, -}; - -extern struct pci_ops emma2rh_pci_ops; - -static struct pci_controller emma2rh_pci_controller = { - .pci_ops = &emma2rh_pci_ops, - .mem_resource = &pci_mem_resource, - .io_resource = &pci_io_resource, - .mem_offset = -0x04000000, - .io_offset = 0, -}; - -static void __init emma2rh_pci_init(void) -{ - /* setup PCI interface */ - emma2rh_out32(EMMA2RH_PCI_ARBIT_CTR, 0x70f); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x80000a18); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_COMMAND, - PCI_STATUS_DEVSEL_MEDIUM | PCI_STATUS_CAP_LIST | - PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_0, 0x10000000); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_1, 0x00000000); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x12000000 | 0x218); - emma2rh_out32(EMMA2RH_PCI_IWIN1_CTR, 0x18000000 | 0x600); - emma2rh_out32(EMMA2RH_PCI_INIT_ESWP, 0x00000200); - - emma2rh_out32(EMMA2RH_PCI_TWIN_CTR, 0x00009200); - emma2rh_out32(EMMA2RH_PCI_TWIN_BADR, 0x00000000); - emma2rh_out32(EMMA2RH_PCI_TWIN0_DADR, 0x00000000); - emma2rh_out32(EMMA2RH_PCI_TWIN1_DADR, 0x00000000); -} - -static int __init emma2rh_pci_setup(void) -{ - emma2rh_pci_init(); - register_pci_controller(&emma2rh_pci_controller); - return 0; -} - -arch_initcall(emma2rh_pci_setup); diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c deleted file mode 100644 index 47f4ee6bbb3b..000000000000 --- a/arch/mips/pci/pci-lasat.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000, 2001, 04 Keith M Wesolowski - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/types.h> - -#include <asm/lasat/lasat.h> - -#include <irq.h> - -extern struct pci_ops nile4_pci_ops; -extern struct pci_ops gt64xxx_pci0_ops; -static struct resource lasat_pci_mem_resource = { - .name = "LASAT PCI MEM", - .start = 0x18000000, - .end = 0x19ffffff, - .flags = IORESOURCE_MEM, -}; - -static struct resource lasat_pci_io_resource = { - .name = "LASAT PCI IO", - .start = 0x1a000000, - .end = 0x1bffffff, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller lasat_pci_controller = { - .mem_resource = &lasat_pci_mem_resource, - .io_resource = &lasat_pci_io_resource, -}; - -static int __init lasat_pci_setup(void) -{ - printk(KERN_DEBUG "PCI: starting\n"); - - if (IS_LASAT_200()) - lasat_pci_controller.pci_ops = &nile4_pci_ops; - else - lasat_pci_controller.pci_ops = >64xxx_pci0_ops; - - register_pci_controller(&lasat_pci_controller); - - return 0; -} - -arch_initcall(lasat_pci_setup); - -#define LASAT_IRQ_ETH1 (LASAT_IRQ_BASE + 0) -#define LASAT_IRQ_ETH0 (LASAT_IRQ_BASE + 1) -#define LASAT_IRQ_HDC (LASAT_IRQ_BASE + 2) -#define LASAT_IRQ_COMP (LASAT_IRQ_BASE + 3) -#define LASAT_IRQ_HDLC (LASAT_IRQ_BASE + 4) -#define LASAT_IRQ_PCIA (LASAT_IRQ_BASE + 5) -#define LASAT_IRQ_PCIB (LASAT_IRQ_BASE + 6) -#define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) -#define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - switch (slot) { - case 1: - case 2: - case 3: - return LASAT_IRQ_PCIA + (((slot-1) + (pin-1)) % 4); - case 4: - return LASAT_IRQ_ETH1; /* Ethernet 1 (LAN 2) */ - case 5: - return LASAT_IRQ_ETH0; /* Ethernet 0 (LAN 1) */ - case 6: - return LASAT_IRQ_HDC; /* IDE controller */ - default: - return 0xff; /* Illegal */ - } - - return -1; -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform index cd2084f44507..1e92e52a137b 100644 --- a/arch/mips/pic32/Platform +++ b/arch/mips/pic32/Platform @@ -1,7 +1,6 @@ # # PIC32MZDA # -platform-$(CONFIG_PIC32MZDA) += pic32/ cflags-$(CONFIG_PIC32MZDA) += -I$(srctree)/arch/mips/include/asm/mach-pic32 load-$(CONFIG_PIC32MZDA) += 0xffffffff88000000 all-$(CONFIG_PIC32MZDA) := $(COMPRESSION_FNAME).bin diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform index c3592b374ad2..f73a1a929965 100644 --- a/arch/mips/pistachio/Platform +++ b/arch/mips/pistachio/Platform @@ -1,7 +1,6 @@ # # IMG Pistachio SoC # -platform-$(CONFIG_MACH_PISTACHIO) += pistachio/ cflags-$(CONFIG_MACH_PISTACHIO) += \ -I$(srctree)/arch/mips/include/asm/mach-pistachio load-$(CONFIG_MACH_PISTACHIO) += 0xffffffff80400000 diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index a09a5da38e6b..558995ed6fe8 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -83,12 +83,12 @@ phys_addr_t mips_cdmm_phys_base(void) static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa80) : (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } @@ -96,12 +96,12 @@ static void __init mips_nmi_setup(void) static void __init mips_ejtag_setup(void) { void *base; - extern char except_vec_ejtag_debug; + extern char except_vec_ejtag_debug[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa00) : (void *)(CAC_BASE + 0x300); - memcpy(base, &except_vec_ejtag_debug, 0x80); + memcpy(base, except_vec_ejtag_debug, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/pmcs-msp71xx/Kconfig b/arch/mips/pmcs-msp71xx/Kconfig deleted file mode 100644 index b185b7620c97..000000000000 --- a/arch/mips/pmcs-msp71xx/Kconfig +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -choice - prompt "PMC-Sierra MSP SOC type" - depends on PMC_MSP - -config PMC_MSP4200_EVAL - bool "PMC-Sierra MSP4200 Eval Board" - select IRQ_MSP_SLP - select HAVE_PCI - select MIPS_L1_CACHE_SHIFT_4 - -config PMC_MSP4200_GW - bool "PMC-Sierra MSP4200 VoIP Gateway" - select IRQ_MSP_SLP - select HAVE_PCI - -config PMC_MSP7120_EVAL - bool "PMC-Sierra MSP7120 Eval Board" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - -config PMC_MSP7120_GW - bool "PMC-Sierra MSP7120 Residential Gateway" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - select MSP_HAS_USB - select MSP_ETH - -config PMC_MSP7120_FPGA - bool "PMC-Sierra MSP7120 FPGA" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - -endchoice - -config MSP_HAS_USB - bool - depends on PMC_MSP - -config MSP_ETH - bool - select MSP_HAS_MAC - depends on PMC_MSP - -config MSP_HAS_MAC - bool - depends on PMC_MSP diff --git a/arch/mips/pmcs-msp71xx/Makefile b/arch/mips/pmcs-msp71xx/Makefile deleted file mode 100644 index c040bd6ed62d..000000000000 --- a/arch/mips/pmcs-msp71xx/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the PMC-Sierra MSP SOCs -# -obj-y += msp_prom.o msp_setup.o msp_irq.o \ - msp_time.o msp_serial.o msp_elb.o -obj-$(CONFIG_PMC_MSP7120_GW) += msp_hwbutton.o -obj-$(CONFIG_IRQ_MSP_SLP) += msp_irq_slp.o -obj-$(CONFIG_IRQ_MSP_CIC) += msp_irq_cic.o msp_irq_per.o -obj-$(CONFIG_PCI) += msp_pci.o -obj-$(CONFIG_MSP_HAS_MAC) += msp_eth.o -obj-$(CONFIG_MSP_HAS_USB) += msp_usb.o -obj-$(CONFIG_MIPS_MT_SMP) += msp_smp.o diff --git a/arch/mips/pmcs-msp71xx/Platform b/arch/mips/pmcs-msp71xx/Platform deleted file mode 100644 index 7af0734a5007..000000000000 --- a/arch/mips/pmcs-msp71xx/Platform +++ /dev/null @@ -1,7 +0,0 @@ -# -# PMC-Sierra MSP SOCs -# -platform-$(CONFIG_PMC_MSP) += pmcs-msp71xx/ -cflags-$(CONFIG_PMC_MSP) += -I$(srctree)/arch/mips/include/asm/mach-pmcs-msp71xx \ - -mno-branch-likely -load-$(CONFIG_PMC_MSP) += 0xffffffff80100000 diff --git a/arch/mips/pmcs-msp71xx/msp_elb.c b/arch/mips/pmcs-msp71xx/msp_elb.c deleted file mode 100644 index 3e9641007216..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_elb.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Sets up the proper Chip Select configuration registers. It is assumed that - * PMON sets up the ADDR and MASK registers properly. - * - * Copyright 2005-2006 PMC-Sierra, Inc. - * Author: Marc St-Jean, Marc_St-Jean@pmc-sierra.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <msp_regs.h> - -static int __init msp_elb_setup(void) -{ -#if defined(CONFIG_PMC_MSP7120_GW) \ - || defined(CONFIG_PMC_MSP7120_EVAL) - /* - * Force all CNFG to be identical and equal to CS0, - * according to OPS doc - */ - *CS1_CNFG_REG = *CS2_CNFG_REG = *CS3_CNFG_REG = *CS0_CNFG_REG; -#endif - return 0; -} - -subsys_initcall(msp_elb_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_eth.c b/arch/mips/pmcs-msp71xx/msp_eth.c deleted file mode 100644 index 15679b427f44..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_eth.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * The setup file for ethernet related hardware on PMC-Sierra MSP processors. - * - * Copyright 2010 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> -#include <linux/delay.h> -#include <msp_regs.h> -#include <msp_int.h> -#include <msp_gpio_macros.h> - - -#define MSP_ETHERNET_GPIO0 14 -#define MSP_ETHERNET_GPIO1 15 -#define MSP_ETHERNET_GPIO2 16 - -#define MSP_ETH_ID "pmc_mspeth" -#define MSP_ETH_SIZE 0xE0 -static struct resource msp_eth0_resources[] = { - [0] = { - .start = MSP_MAC0_BASE, - .end = MSP_MAC0_BASE + MSP_ETH_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_MAC0, - .end = MSP_INT_MAC0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct resource msp_eth1_resources[] = { - [0] = { - .start = MSP_MAC1_BASE, - .end = MSP_MAC1_BASE + MSP_ETH_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_MAC1, - .end = MSP_INT_MAC1, - .flags = IORESOURCE_IRQ, - }, -}; - - - -static struct platform_device mspeth_device[] = { - [0] = { - .name = MSP_ETH_ID, - .id = 0, - .num_resources = ARRAY_SIZE(msp_eth0_resources), - .resource = msp_eth0_resources, - }, - [1] = { - .name = MSP_ETH_ID, - .id = 1, - .num_resources = ARRAY_SIZE(msp_eth1_resources), - .resource = msp_eth1_resources, - }, - -}; -#define msp_eth_devs mspeth_device - -int __init msp_eth_setup(void) -{ - int i, ret = 0; - - /* Configure the GPIO and take the ethernet PHY out of reset */ - msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO0); - msp_gpio_pin_hi(MSP_ETHERNET_GPIO0); - - for (i = 0; i < ARRAY_SIZE(msp_eth_devs); i++) { - ret = platform_device_register(&msp_eth_devs[i]); - printk(KERN_INFO "device: %d, return value = %d\n", i, ret); - if (ret) { - platform_device_unregister(&msp_eth_devs[i]); - break; - } - } - - if (ret) - printk(KERN_WARNING "Could not initialize " - "MSPETH device structures.\n"); - - return ret; -} -subsys_initcall(msp_eth_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_hwbutton.c b/arch/mips/pmcs-msp71xx/msp_hwbutton.c deleted file mode 100644 index bb57ed9ea2bd..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_hwbutton.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Sets up interrupt handlers for various hardware switches which are - * connected to interrupt lines. - * - * Copyright 2005-2207 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/interrupt.h> - -#include <msp_int.h> -#include <msp_regs.h> -#include <msp_regops.h> - -/* For hwbutton_interrupt->initial_state */ -#define HWBUTTON_HI 0x1 -#define HWBUTTON_LO 0x2 - -/* - * This struct describes a hardware button - */ -struct hwbutton_interrupt { - char *name; /* Name of button */ - int irq; /* Actual LINUX IRQ */ - int eirq; /* Extended IRQ number (0-7) */ - int initial_state; /* The "normal" state of the switch */ - void (*handle_hi)(void *); /* Handler: switch input has gone HI */ - void (*handle_lo)(void *); /* Handler: switch input has gone LO */ - void *data; /* Optional data to pass to handler */ -}; - -#ifdef CONFIG_PMC_MSP7120_GW -extern void msp_restart(char *); - -static void softreset_push(void *data) -{ - printk(KERN_WARNING "SOFTRESET switch was pushed\n"); - - /* - * In the future you could move this to the release handler, - * timing the difference between the 'push' and 'release', and only - * doing this ungraceful restart if the button has been down for - * a certain amount of time; otherwise doing a graceful restart. - */ - - msp_restart(NULL); -} - -static void softreset_release(void *data) -{ - printk(KERN_WARNING "SOFTRESET switch was released\n"); - - /* Do nothing */ -} - -static void standby_on(void *data) -{ - printk(KERN_WARNING "STANDBY switch was set to ON (not implemented)\n"); - - /* TODO: Put board in standby mode */ -} - -static void standby_off(void *data) -{ - printk(KERN_WARNING - "STANDBY switch was set to OFF (not implemented)\n"); - - /* TODO: Take out of standby mode */ -} - -static struct hwbutton_interrupt softreset_sw = { - .name = "Softreset button", - .irq = MSP_INT_EXT0, - .eirq = 0, - .initial_state = HWBUTTON_HI, - .handle_hi = softreset_release, - .handle_lo = softreset_push, - .data = NULL, -}; - -static struct hwbutton_interrupt standby_sw = { - .name = "Standby switch", - .irq = MSP_INT_EXT1, - .eirq = 1, - .initial_state = HWBUTTON_HI, - .handle_hi = standby_off, - .handle_lo = standby_on, - .data = NULL, -}; -#endif /* CONFIG_PMC_MSP7120_GW */ - -static irqreturn_t hwbutton_handler(int irq, void *data) -{ - struct hwbutton_interrupt *hirq = data; - unsigned long cic_ext = *CIC_EXT_CFG_REG; - - if (CIC_EXT_IS_ACTIVE_HI(cic_ext, hirq->eirq)) { - /* Interrupt: pin is now HI */ - CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq); - hirq->handle_hi(hirq->data); - } else { - /* Interrupt: pin is now LO */ - CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq); - hirq->handle_lo(hirq->data); - } - - /* - * Invert the POLARITY of this level interrupt to ack the interrupt - * Thus next state change will invoke the opposite message - */ - *CIC_EXT_CFG_REG = cic_ext; - - return IRQ_HANDLED; -} - -static int msp_hwbutton_register(struct hwbutton_interrupt *hirq) -{ - unsigned long cic_ext; - - if (hirq->handle_hi == NULL || hirq->handle_lo == NULL) - return -EINVAL; - - cic_ext = *CIC_EXT_CFG_REG; - CIC_EXT_SET_TRIGGER_LEVEL(cic_ext, hirq->eirq); - if (hirq->initial_state == HWBUTTON_HI) - CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq); - else - CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq); - *CIC_EXT_CFG_REG = cic_ext; - - return request_irq(hirq->irq, hwbutton_handler, 0, - hirq->name, hirq); -} - -static int __init msp_hwbutton_setup(void) -{ -#ifdef CONFIG_PMC_MSP7120_GW - msp_hwbutton_register(&softreset_sw); - msp_hwbutton_register(&standby_sw); -#endif - return 0; -} - -subsys_initcall(msp_hwbutton_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c deleted file mode 100644 index d525cc931d89..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IRQ vector handles - * - * Copyright (C) 1995, 1996, 1997, 2003 by Ralf Baechle - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/ptrace.h> -#include <linux/time.h> - -#include <asm/irq_cpu.h> -#include <asm/setup.h> - -#include <msp_int.h> - -/* SLP bases systems */ -extern void msp_slp_irq_init(void); -extern void msp_slp_irq_dispatch(void); - -/* CIC based systems */ -extern void msp_cic_irq_init(void); -extern void msp_cic_irq_dispatch(void); - -/* VSMP support init */ -extern void msp_vsmp_int_init(void); - -/* vectored interrupt implementation */ - -/* SW0/1 interrupts are used for SMP */ -static inline void mac0_int_dispatch(void) { do_IRQ(MSP_INT_MAC0); } -static inline void mac1_int_dispatch(void) { do_IRQ(MSP_INT_MAC1); } -static inline void mac2_int_dispatch(void) { do_IRQ(MSP_INT_SAR); } -static inline void usb_int_dispatch(void) { do_IRQ(MSP_INT_USB); } -static inline void sec_int_dispatch(void) { do_IRQ(MSP_INT_SEC); } - -/* - * The PMC-Sierra MSP interrupts are arranged in a 3 level cascaded - * hierarchical system. The first level are the direct MIPS interrupts - * and are assigned the interrupt range 0-7. The second level is the SLM - * interrupt controller and is assigned the range 8-39. The third level - * comprises the Peripherial block, the PCI block, the PCI MSI block and - * the SLP. The PCI interrupts and the SLP errors are handled by the - * relevant subsystems so the core interrupt code needs only concern - * itself with the Peripheral block. These are assigned interrupts in - * the range 40-71. - */ - -asmlinkage void plat_irq_dispatch(void) -{ - u32 pending; - - pending = read_c0_status() & read_c0_cause(); - - /* - * jump to the correct interrupt routine - * These are arranged in priority order and the timer - * comes first! - */ - -#ifdef CONFIG_IRQ_MSP_CIC /* break out the CIC stuff for now */ - if (pending & C_IRQ4) /* do the peripherals first, that's the timer */ - msp_cic_irq_dispatch(); - - else if (pending & C_IRQ0) - do_IRQ(MSP_INT_MAC0); - - else if (pending & C_IRQ1) - do_IRQ(MSP_INT_MAC1); - - else if (pending & C_IRQ2) - do_IRQ(MSP_INT_USB); - - else if (pending & C_IRQ3) - do_IRQ(MSP_INT_SAR); - - else if (pending & C_IRQ5) - do_IRQ(MSP_INT_SEC); - -#else - if (pending & C_IRQ5) - do_IRQ(MSP_INT_TIMER); - - else if (pending & C_IRQ0) - do_IRQ(MSP_INT_MAC0); - - else if (pending & C_IRQ1) - do_IRQ(MSP_INT_MAC1); - - else if (pending & C_IRQ3) - do_IRQ(MSP_INT_VE); - - else if (pending & C_IRQ4) - msp_slp_irq_dispatch(); -#endif - - else if (pending & C_SW0) /* do software after hardware */ - do_IRQ(MSP_INT_SW0); - - else if (pending & C_SW1) - do_IRQ(MSP_INT_SW1); -} - -void __init arch_init_irq(void) -{ - /* assume we'll be using vectored interrupt mode except in UP mode*/ -#ifdef CONFIG_MIPS_MT - BUG_ON(!cpu_has_vint); -#endif - /* initialize the 1st-level CPU based interrupt controller */ - mips_cpu_irq_init(); - -#ifdef CONFIG_IRQ_MSP_CIC - msp_cic_irq_init(); -#ifdef CONFIG_MIPS_MT - set_vi_handler(MSP_INT_CIC, msp_cic_irq_dispatch); - set_vi_handler(MSP_INT_MAC0, mac0_int_dispatch); - set_vi_handler(MSP_INT_MAC1, mac1_int_dispatch); - set_vi_handler(MSP_INT_SAR, mac2_int_dispatch); - set_vi_handler(MSP_INT_USB, usb_int_dispatch); - set_vi_handler(MSP_INT_SEC, sec_int_dispatch); -#ifdef CONFIG_MIPS_MT_SMP - msp_vsmp_int_init(); -#endif /* CONFIG_MIPS_MT_SMP */ -#endif /* CONFIG_MIPS_MT */ - /* setup the cascaded interrupts */ - if (request_irq(MSP_INT_CIC, no_action, IRQF_NO_THREAD, - "MSP CIC cascade", NULL)) - pr_err("Failed to register MSP CIC cascade interrupt\n"); - if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD, - "MSP PER cascade", NULL)) - pr_err("Failed to register MSP PER cascade interrupt\n"); - -#else - /* - * Setup the 2nd-level SLP register based interrupt controller. - * VSMP support support is not enabled for SLP. - */ - msp_slp_irq_init(); - - /* setup the cascaded SLP/PER interrupts */ - if (request_irq(MSP_INT_SLP, no_action, IRQF_NO_THREAD, - "MSP CIC cascade", NULL)) - pr_err("Failed to register MSP CIC cascade interrupt\n"); - if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD, - "MSP PER cascade", NULL)) - pr_err("Failed to register MSP PER cascade interrupt\n"); -#endif -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c deleted file mode 100644 index 0706010cc99f..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c - * - * This file define the irq handler for MSP CIC subsystem interrupts. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/irq.h> - -#include <asm/mipsregs.h> - -#include <msp_cic_int.h> -#include <msp_regs.h> - -/* - * External API - */ -extern void msp_per_irq_init(void); -extern void msp_per_irq_dispatch(void); - - -/* - * Convenience Macro. Should be somewhere generic. - */ -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -#ifdef CONFIG_SMP - -#define LOCK_VPE(flags, mtflags) \ -do { \ - local_irq_save(flags); \ - mtflags = dmt(); \ -} while (0) - -#define UNLOCK_VPE(flags, mtflags) \ -do { \ - emt(mtflags); \ - local_irq_restore(flags);\ -} while (0) - -#define LOCK_CORE(flags, mtflags) \ -do { \ - local_irq_save(flags); \ - mtflags = dvpe(); \ -} while (0) - -#define UNLOCK_CORE(flags, mtflags) \ -do { \ - evpe(mtflags); \ - local_irq_restore(flags);\ -} while (0) - -#else - -#define LOCK_VPE(flags, mtflags) -#define UNLOCK_VPE(flags, mtflags) -#endif - -/* ensure writes to cic are completed */ -static inline void cic_wmb(void) -{ - const volatile void __iomem *cic_mem = CIC_VPE0_MSK_REG; - volatile u32 dummy_read; - - wmb(); - dummy_read = __raw_readl(cic_mem); - dummy_read++; -} - -static void unmask_cic_irq(struct irq_data *d) -{ - volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG; - int vpe; -#ifdef CONFIG_SMP - unsigned int mtflags; - unsigned long flags; - - /* - * Make sure we have IRQ affinity. It may have changed while - * we were processing the IRQ. - */ - if (!cpumask_test_cpu(smp_processor_id(), - irq_data_get_affinity_mask(d))) - return; -#endif - - vpe = get_current_vpe(); - LOCK_VPE(flags, mtflags); - cic_msk_reg[vpe] |= (1 << (d->irq - MSP_CIC_INTBASE)); - UNLOCK_VPE(flags, mtflags); - cic_wmb(); -} - -static void mask_cic_irq(struct irq_data *d) -{ - volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG; - int vpe = get_current_vpe(); -#ifdef CONFIG_SMP - unsigned long flags, mtflags; -#endif - LOCK_VPE(flags, mtflags); - cic_msk_reg[vpe] &= ~(1 << (d->irq - MSP_CIC_INTBASE)); - UNLOCK_VPE(flags, mtflags); - cic_wmb(); -} -static void msp_cic_irq_ack(struct irq_data *d) -{ - mask_cic_irq(d); - /* - * Only really necessary for 18, 16-14 and sometimes 3:0 - * (since these can be edge sensitive) but it doesn't - * hurt for the others - */ - *CIC_STS_REG = (1 << (d->irq - MSP_CIC_INTBASE)); -} - -/* Note: Limiting to VSMP. */ - -#ifdef CONFIG_MIPS_MT_SMP -static int msp_cic_irq_set_affinity(struct irq_data *d, - const struct cpumask *cpumask, bool force) -{ - int cpu; - unsigned long flags; - unsigned int mtflags; - unsigned long imask = (1 << (d->irq - MSP_CIC_INTBASE)); - volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG; - - /* timer balancing should be disabled in kernel code */ - BUG_ON(d->irq == MSP_INT_VPE0_TIMER || d->irq == MSP_INT_VPE1_TIMER); - - LOCK_CORE(flags, mtflags); - /* enable if any of each VPE's TCs require this IRQ */ - for_each_online_cpu(cpu) { - if (cpumask_test_cpu(cpu, cpumask)) - cic_mask[cpu] |= imask; - else - cic_mask[cpu] &= ~imask; - - } - - UNLOCK_CORE(flags, mtflags); - return 0; - -} -#endif - -static struct irq_chip msp_cic_irq_controller = { - .name = "MSP_CIC", - .irq_mask = mask_cic_irq, - .irq_mask_ack = msp_cic_irq_ack, - .irq_unmask = unmask_cic_irq, - .irq_ack = msp_cic_irq_ack, -#ifdef CONFIG_MIPS_MT_SMP - .irq_set_affinity = msp_cic_irq_set_affinity, -#endif -}; - -void __init msp_cic_irq_init(void) -{ - int i; - /* Mask/clear interrupts. */ - *CIC_VPE0_MSK_REG = 0x00000000; - *CIC_VPE1_MSK_REG = 0x00000000; - *CIC_STS_REG = 0xFFFFFFFF; - /* - * The MSP7120 RG and EVBD boards use IRQ[6:4] for PCI. - * These inputs map to EXT_INT_POL[6:4] inside the CIC. - * They are to be active low, level sensitive. - */ - *CIC_EXT_CFG_REG &= 0xFFFF8F8F; - - /* initialize all the IRQ descriptors */ - for (i = MSP_CIC_INTBASE ; i < MSP_CIC_INTBASE + 32 ; i++) { - irq_set_chip_and_handler(i, &msp_cic_irq_controller, - handle_level_irq); - } - - /* Initialize the PER interrupt sub-system */ - msp_per_irq_init(); -} - -/* CIC masked by CIC vector processing before dispatch called */ -void msp_cic_irq_dispatch(void) -{ - volatile u32 *cic_msk_reg = (volatile u32 *)CIC_VPE0_MSK_REG; - u32 cic_mask; - u32 pending; - int cic_status = *CIC_STS_REG; - cic_mask = cic_msk_reg[get_current_vpe()]; - pending = cic_status & cic_mask; - if (pending & (1 << (MSP_INT_VPE0_TIMER - MSP_CIC_INTBASE))) { - do_IRQ(MSP_INT_VPE0_TIMER); - } else if (pending & (1 << (MSP_INT_VPE1_TIMER - MSP_CIC_INTBASE))) { - do_IRQ(MSP_INT_VPE1_TIMER); - } else if (pending & (1 << (MSP_INT_PER - MSP_CIC_INTBASE))) { - msp_per_irq_dispatch(); - } else if (pending) { - do_IRQ(ffs(pending) + MSP_CIC_INTBASE - 1); - } else{ - spurious_interrupt(); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_per.c b/arch/mips/pmcs-msp71xx/msp_irq_per.c deleted file mode 100644 index b284412b2923..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_per.c +++ /dev/null @@ -1,127 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c - * - * This file define the irq handler for MSP PER subsystem interrupts. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/spinlock.h> -#include <linux/bitops.h> - -#include <asm/mipsregs.h> - -#include <msp_cic_int.h> -#include <msp_regs.h> - - -/* - * Convenience Macro. Should be somewhere generic. - */ -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -#ifdef CONFIG_SMP -/* - * The PER registers must be protected from concurrent access. - */ - -static DEFINE_SPINLOCK(per_lock); -#endif - -/* ensure writes to per are completed */ - -static inline void per_wmb(void) -{ - const volatile void __iomem *per_mem = PER_INT_MSK_REG; - volatile u32 dummy_read; - - wmb(); - dummy_read = __raw_readl(per_mem); - dummy_read++; -} - -static inline void unmask_per_irq(struct irq_data *d) -{ -#ifdef CONFIG_SMP - unsigned long flags; - spin_lock_irqsave(&per_lock, flags); - *PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE)); - spin_unlock_irqrestore(&per_lock, flags); -#else - *PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE)); -#endif - per_wmb(); -} - -static inline void mask_per_irq(struct irq_data *d) -{ -#ifdef CONFIG_SMP - unsigned long flags; - spin_lock_irqsave(&per_lock, flags); - *PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE)); - spin_unlock_irqrestore(&per_lock, flags); -#else - *PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE)); -#endif - per_wmb(); -} - -static inline void msp_per_irq_ack(struct irq_data *d) -{ - mask_per_irq(d); - /* - * In the PER interrupt controller, only bits 11 and 10 - * are write-to-clear, (SPI TX complete, SPI RX complete). - * It does nothing for any others. - */ - *PER_INT_STS_REG = (1 << (d->irq - MSP_PER_INTBASE)); -} - -#ifdef CONFIG_SMP -static int msp_per_irq_set_affinity(struct irq_data *d, - const struct cpumask *affinity, bool force) -{ - /* WTF is this doing ????? */ - unmask_per_irq(d); - return 0; -} -#endif - -static struct irq_chip msp_per_irq_controller = { - .name = "MSP_PER", - .irq_enable = unmask_per_irq, - .irq_disable = mask_per_irq, - .irq_ack = msp_per_irq_ack, -#ifdef CONFIG_SMP - .irq_set_affinity = msp_per_irq_set_affinity, -#endif -}; - -void __init msp_per_irq_init(void) -{ - int i; - /* Mask/clear interrupts. */ - *PER_INT_MSK_REG = 0x00000000; - *PER_INT_STS_REG = 0xFFFFFFFF; - /* initialize all the IRQ descriptors */ - for (i = MSP_PER_INTBASE; i < MSP_PER_INTBASE + 32; i++) { - irq_set_chip(i, &msp_per_irq_controller); - } -} - -void msp_per_irq_dispatch(void) -{ - u32 per_mask = *PER_INT_MSK_REG; - u32 per_status = *PER_INT_STS_REG; - u32 pending; - - pending = per_status & per_mask; - if (pending) { - do_IRQ(ffs(pending) + MSP_PER_INTBASE - 1); - } else { - spurious_interrupt(); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_slp.c b/arch/mips/pmcs-msp71xx/msp_irq_slp.c deleted file mode 100644 index 097a5fd3b06b..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_slp.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * This file define the irq handler for MSP SLM subsystem interrupts. - * - * Copyright 2005-2006 PMC-Sierra, Inc, derived from irq_cpu.c - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/bitops.h> - -#include <asm/mipsregs.h> - -#include <msp_slp_int.h> -#include <msp_regs.h> - -static inline void unmask_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_MSK_REG |= (1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_MSK_REG |= (1 << (irq - MSP_PER_INTBASE)); -} - -static inline void mask_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_MSK_REG &= ~(1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_MSK_REG &= ~(1 << (irq - MSP_PER_INTBASE)); -} - -/* - * While we ack the interrupt interrupts are disabled and thus we don't need - * to deal with concurrency issues. Same for msp_slp_irq_end. - */ -static inline void ack_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_STS_REG = (1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_STS_REG = (1 << (irq - MSP_PER_INTBASE)); -} - -static struct irq_chip msp_slp_irq_controller = { - .name = "MSP_SLP", - .irq_ack = ack_msp_slp_irq, - .irq_mask = mask_msp_slp_irq, - .irq_unmask = unmask_msp_slp_irq, -}; - -void __init msp_slp_irq_init(void) -{ - int i; - - /* Mask/clear interrupts. */ - *SLP_INT_MSK_REG = 0x00000000; - *PER_INT_MSK_REG = 0x00000000; - *SLP_INT_STS_REG = 0xFFFFFFFF; - *PER_INT_STS_REG = 0xFFFFFFFF; - - /* initialize all the IRQ descriptors */ - for (i = MSP_SLP_INTBASE; i < MSP_PER_INTBASE + 32; i++) - irq_set_chip_and_handler(i, &msp_slp_irq_controller, - handle_level_irq); -} - -void msp_slp_irq_dispatch(void) -{ - u32 pending; - int intbase; - - intbase = MSP_SLP_INTBASE; - pending = *SLP_INT_STS_REG & *SLP_INT_MSK_REG; - - /* check for PER interrupt */ - if (pending == (1 << (MSP_INT_PER - MSP_SLP_INTBASE))) { - intbase = MSP_PER_INTBASE; - pending = *PER_INT_STS_REG & *PER_INT_MSK_REG; - } - - /* check for spurious interrupt */ - if (pending == 0x00000000) { - printk(KERN_ERR "Spurious %s interrupt?\n", - (intbase == MSP_SLP_INTBASE) ? "SLP" : "PER"); - return; - } - - /* dispatch the irq */ - do_IRQ(ffs(pending) + intbase - 1); -} diff --git a/arch/mips/pmcs-msp71xx/msp_pci.c b/arch/mips/pmcs-msp71xx/msp_pci.c deleted file mode 100644 index 428dea23c35c..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_pci.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * The setup file for PCI related hardware on PMC-Sierra MSP processors. - * - * Copyright 2005-2006 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/init.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -extern void msp_pci_init(void); - -static int __init msp_pci_setup(void) -{ -#if 0 /* Linux 2.6 initialization code to be completed */ - if (getdeviceid() & DEV_ID_SINGLE_PC) { - /* If single card mode */ - slmRegs *sreg = (slmRegs *) SREG_BASE; - - sreg->single_pc_enable = SINGLE_PCCARD; - } -#endif - - msp_pci_init(); - - return 0; -} - -subsys_initcall(msp_pci_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c deleted file mode 100644 index 800a21b8b8b0..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_prom.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * PROM library initialisation code, assuming a version of - * pmon is the boot code. - * - * Copyright 2000,2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/xx files. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/slab.h> - -#include <asm/addrspace.h> -#include <asm/bootinfo.h> -#include <asm-generic/sections.h> -#include <asm/page.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -/* global PROM environment variables and pointers */ -int prom_argc; -char **prom_argv, **prom_envp; -int *prom_vec; - -/* debug flag */ -int init_debug = 1; - -/* memory blocks */ -struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; - -#define MAX_PROM_MEM 5 -static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata; -static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata; -static unsigned int nr_prom_mem __initdata; - -/* default feature sets */ -static char msp_default_features[] = -#if defined(CONFIG_PMC_MSP4200_EVAL) \ - || defined(CONFIG_PMC_MSP4200_GW) - "ERER"; -#elif defined(CONFIG_PMC_MSP7120_EVAL) \ - || defined(CONFIG_PMC_MSP7120_GW) - "EMEMSP"; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - "EMEM"; -#endif - -/* conversion functions */ -static inline unsigned char str2hexnum(unsigned char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return 0; /* foo */ -} - -int str2eaddr(unsigned char *ea, unsigned char *str) -{ - int index = 0; - unsigned char num = 0; - - while (*str != '\0') { - if ((*str == '.') || (*str == ':')) { - ea[index++] = num; - num = 0; - str++; - } else { - num = num << 4; - num |= str2hexnum(*str++); - } - } - - if (index == 5) { - ea[index++] = num; - return 0; - } else - return -1; -} -EXPORT_SYMBOL(str2eaddr); - -static inline unsigned long str2hex(unsigned char *str) -{ - int value = 0; - - while (*str) { - value = value << 4; - value |= str2hexnum(*str++); - } - - return value; -} - -/* function to query the system information */ -const char *get_system_type(void) -{ -#if defined(CONFIG_PMC_MSP4200_EVAL) - return "PMC-Sierra MSP4200 Eval Board"; -#elif defined(CONFIG_PMC_MSP4200_GW) - return "PMC-Sierra MSP4200 VoIP Gateway"; -#elif defined(CONFIG_PMC_MSP7120_EVAL) - return "PMC-Sierra MSP7120 Eval Board"; -#elif defined(CONFIG_PMC_MSP7120_GW) - return "PMC-Sierra MSP7120 Residential Gateway"; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - return "PMC-Sierra MSP7120 FPGA"; -#else - #error "What is the type of *your* MSP?" -#endif -} - -int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr) -{ - char *ethaddr_str; - - ethaddr_str = prom_getenv(ethaddr_name); - if (!ethaddr_str) { - printk(KERN_WARNING "%s not set in boot prom\n", ethaddr_name); - return -1; - } - - if (str2eaddr(ethernet_addr, ethaddr_str) == -1) { - printk(KERN_WARNING "%s badly formatted-<%s>\n", - ethaddr_name, ethaddr_str); - return -1; - } - - if (init_debug > 1) { - int i; - printk(KERN_DEBUG "get_ethernet_addr: for %s ", ethaddr_name); - for (i = 0; i < 5; i++) - printk(KERN_DEBUG "%02x:", - (unsigned char)*(ethernet_addr+i)); - printk(KERN_DEBUG "%02x\n", *(ethernet_addr+i)); - } - - return 0; -} -EXPORT_SYMBOL(get_ethernet_addr); - -static char *get_features(void) -{ - char *feature = prom_getenv(FEATURES); - - if (feature == NULL) { - /* default features based on MACHINE_TYPE */ - feature = msp_default_features; - } - - return feature; -} - -static char test_feature(char c) -{ - char *feature = get_features(); - - while (*feature) { - if (*feature++ == c) - return *feature; - feature++; - } - - return FEATURE_NOEXIST; -} - -unsigned long get_deviceid(void) -{ - char *deviceid = prom_getenv(DEVICEID); - - if (deviceid == NULL) - return *DEV_ID_REG; - else - return str2hex(deviceid); -} - -char identify_pci(void) -{ - return test_feature(PCI_KEY); -} -EXPORT_SYMBOL(identify_pci); - -char identify_pcimux(void) -{ - return test_feature(PCIMUX_KEY); -} - -char identify_sec(void) -{ - return test_feature(SEC_KEY); -} -EXPORT_SYMBOL(identify_sec); - -char identify_spad(void) -{ - return test_feature(SPAD_KEY); -} -EXPORT_SYMBOL(identify_spad); - -char identify_tdm(void) -{ - return test_feature(TDM_KEY); -} -EXPORT_SYMBOL(identify_tdm); - -char identify_zsp(void) -{ - return test_feature(ZSP_KEY); -} -EXPORT_SYMBOL(identify_zsp); - -static char identify_enetfeature(char key, unsigned long interface_num) -{ - char *feature = get_features(); - - while (*feature) { - if (*feature++ == key && interface_num-- == 0) - return *feature; - feature++; - } - - return FEATURE_NOEXIST; -} - -char identify_enet(unsigned long interface_num) -{ - return identify_enetfeature(ENET_KEY, interface_num); -} -EXPORT_SYMBOL(identify_enet); - -char identify_enetTxD(unsigned long interface_num) -{ - return identify_enetfeature(ENETTXD_KEY, interface_num); -} -EXPORT_SYMBOL(identify_enetTxD); - -unsigned long identify_family(void) -{ - unsigned long deviceid; - - deviceid = get_deviceid(); - - return deviceid & CPU_DEVID_FAMILY; -} -EXPORT_SYMBOL(identify_family); - -unsigned long identify_revision(void) -{ - unsigned long deviceid; - - deviceid = get_deviceid(); - - return deviceid & CPU_DEVID_REVISION; -} -EXPORT_SYMBOL(identify_revision); - -/* PROM environment functions */ -char *prom_getenv(char *env_name) -{ - /* - * Return a pointer to the given environment variable. prom_envp - * points to a null terminated array of pointers to variables. - * Environment variables are stored in the form of "memsize=64" - */ - - char **var = prom_envp; - int i = strlen(env_name); - - while (*var) { - if (strncmp(env_name, *var, i) == 0) { - return *var + strlen(env_name) + 1; - } - var++; - } - - return NULL; -} - -/* PROM commandline functions */ -void __init prom_init_cmdline(void) -{ - char *cp; - int actr; - - actr = 1; /* Always ignore argv[0] */ - - cp = &(arcs_cmdline[0]); - while (actr < prom_argc) { - strcpy(cp, prom_argv[actr]); - cp += strlen(prom_argv[actr]); - *cp++ = ' '; - actr++; - } - if (cp != &(arcs_cmdline[0])) /* get rid of trailing space */ - --cp; - *cp = '\0'; -} - -/* memory allocation functions */ -static int __init prom_memtype_classify(unsigned int type) -{ - switch (type) { - case yamon_free: - return BOOT_MEM_RAM; - case yamon_prom: - return BOOT_MEM_ROM_DATA; - default: - return BOOT_MEM_RESERVED; - } -} - -void __init prom_meminit(void) -{ - struct prom_pmemblock *p; - - p = prom_getmdesc(); - - while (p->size) { - long type; - unsigned long base, size; - - type = prom_memtype_classify(p->type); - base = p->base; - size = p->size; - - add_memory_region(base, size, type); - p++; - - if (type == BOOT_MEM_ROM_DATA) { - if (nr_prom_mem >= MAX_PROM_MEM) { - pr_err("Too many ROM DATA regions"); - continue; - } - prom_mem_base[nr_prom_mem] = base; - prom_mem_size[nr_prom_mem] = size; - nr_prom_mem++; - } - } -} - -void __init prom_free_prom_memory(void) -{ - int argc; - char **argv; - char **envp; - char *ptr; - int len = 0; - int i; - - /* - * preserve environment variables and command line from pmon/bbload - * first preserve the command line - */ - for (argc = 0; argc < prom_argc; argc++) { - len += sizeof(char *); /* length of pointer */ - len += strlen(prom_argv[argc]) + 1; /* length of string */ - } - len += sizeof(char *); /* plus length of null pointer */ - - argv = kmalloc(len, GFP_KERNEL); - ptr = (char *) &argv[prom_argc + 1]; /* strings follow array */ - - for (argc = 0; argc < prom_argc; argc++) { - argv[argc] = ptr; - strcpy(ptr, prom_argv[argc]); - ptr += strlen(prom_argv[argc]) + 1; - } - argv[prom_argc] = NULL; /* end array with null pointer */ - prom_argv = argv; - - /* next preserve the environment variables */ - len = 0; - i = 0; - for (envp = prom_envp; *envp != NULL; envp++) { - i++; /* count number of environment variables */ - len += sizeof(char *); /* length of pointer */ - len += strlen(*envp) + 1; /* length of string */ - } - len += sizeof(char *); /* plus length of null pointer */ - - envp = kmalloc(len, GFP_KERNEL); - ptr = (char *) &envp[i+1]; - - for (argc = 0; argc < i; argc++) { - envp[argc] = ptr; - strcpy(ptr, prom_envp[argc]); - ptr += strlen(prom_envp[argc]) + 1; - } - envp[i] = NULL; /* end array with null pointer */ - prom_envp = envp; - - for (i = 0; i < nr_prom_mem; i++) { - free_init_pages("prom memory", - prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]); - } -} - -struct prom_pmemblock *__init prom_getmdesc(void) -{ - static char memsz_env[] __initdata = "memsize"; - static char heaptop_env[] __initdata = "heaptop"; - char *str; - unsigned int memsize; - unsigned int heaptop; - int i; - - str = prom_getenv(memsz_env); - if (!str) { - ppfinit("memsize not set in boot prom, " - "set to default (32Mb)\n"); - memsize = 0x02000000; - } else { - memsize = simple_strtol(str, NULL, 0); - - if (memsize == 0) { - /* if memsize is a bad size, use reasonable default */ - memsize = 0x02000000; - } - - /* convert to physical address (removing caching bits, etc) */ - memsize = CPHYSADDR(memsize); - } - - str = prom_getenv(heaptop_env); - if (!str) { - heaptop = CPHYSADDR((u32)&_text); - ppfinit("heaptop not set in boot prom, " - "set to default 0x%08x\n", heaptop); - } else { - heaptop = simple_strtol(str, NULL, 16); - if (heaptop == 0) { - /* heaptop conversion bad, might have 0xValue */ - heaptop = simple_strtol(str, NULL, 0); - - if (heaptop == 0) { - /* heaptop still bad, use reasonable default */ - heaptop = CPHYSADDR((u32)&_text); - } - } - - /* convert to physical address (removing caching bits, etc) */ - heaptop = CPHYSADDR((u32)heaptop); - } - - /* the base region */ - i = 0; - mdesc[i].type = BOOT_MEM_RESERVED; - mdesc[i].base = 0x00000000; - mdesc[i].size = PAGE_ALIGN(0x300 + 0x80); - /* jtag interrupt vector + sizeof vector */ - - /* PMON data */ - if (heaptop > mdesc[i].base + mdesc[i].size) { - i++; /* 1 */ - mdesc[i].type = BOOT_MEM_ROM_DATA; - mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size; - mdesc[i].size = heaptop - mdesc[i].base; - } - - /* end of PMON data to start of kernel -- probably zero .. */ - if (heaptop != CPHYSADDR((u32)_text)) { - i++; /* 2 */ - mdesc[i].type = BOOT_MEM_RAM; - mdesc[i].base = heaptop; - mdesc[i].size = CPHYSADDR((u32)_text) - mdesc[i].base; - } - - /* kernel proper */ - i++; /* 3 */ - mdesc[i].type = BOOT_MEM_RESERVED; - mdesc[i].base = CPHYSADDR((u32)_text); - mdesc[i].size = CPHYSADDR(PAGE_ALIGN((u32)_end)) - mdesc[i].base; - - /* Remainder of RAM -- under memsize */ - i++; /* 5 */ - mdesc[i].type = yamon_free; - mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size; - mdesc[i].size = memsize - mdesc[i].base; - - return &mdesc[0]; -} diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c deleted file mode 100644 index 940c684f6921..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_serial.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * The setup file for serial related hardware on PMC-Sierra MSP processors. - * - * Copyright 2005 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/serial.h> -#include <linux/serial_core.h> -#include <linux/serial_reg.h> -#include <linux/slab.h> - -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/serial.h> -#include <linux/serial_8250.h> - -#include <msp_prom.h> -#include <msp_int.h> -#include <msp_regs.h> - -struct msp_uart_data { - int last_lcr; -}; - -static void msp_serial_out(struct uart_port *p, int offset, int value) -{ - struct msp_uart_data *d = p->private_data; - - if (offset == UART_LCR) - d->last_lcr = value; - - offset <<= p->regshift; - writeb(value, p->membase + offset); -} - -static unsigned int msp_serial_in(struct uart_port *p, int offset) -{ - offset <<= p->regshift; - - return readb(p->membase + offset); -} - -static int msp_serial_handle_irq(struct uart_port *p) -{ - struct msp_uart_data *d = p->private_data; - unsigned int iir = readb(p->membase + (UART_IIR << p->regshift)); - - if (serial8250_handle_irq(p, iir)) { - return 1; - } else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { - /* - * The DesignWare APB UART has an Busy Detect (0x07) interrupt - * meaning an LCR write attempt occurred while the UART was - * busy. The interrupt must be cleared by reading the UART - * status register (USR) and the LCR re-written. - * - * Note: MSP reserves 0x20 bytes of address space for the UART - * and the USR is mapped in a separate block at an offset of - * 0xc0 from the start of the UART. - */ - (void)readb(p->membase + 0xc0); - writeb(d->last_lcr, p->membase + (UART_LCR << p->regshift)); - - return 1; - } - - return 0; -} - -void __init msp_serial_setup(void) -{ - char *s; - char *endp; - struct uart_port up; - unsigned int uartclk; - - memset(&up, 0, sizeof(up)); - - /* Check if clock was specified in environment */ - s = prom_getenv("uartfreqhz"); - if(!(s && *s && (uartclk = simple_strtoul(s, &endp, 10)) && *endp == 0)) - uartclk = MSP_BASE_BAUD; - ppfinit("UART clock set to %d\n", uartclk); - - /* Initialize first serial port */ - up.mapbase = MSP_UART0_BASE; - up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); - up.irq = MSP_INT_UART0; - up.uartclk = uartclk; - up.regshift = 2; - up.iotype = UPIO_MEM; - up.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; - up.type = PORT_16550A; - up.line = 0; - up.serial_out = msp_serial_out; - up.serial_in = msp_serial_in; - up.handle_irq = msp_serial_handle_irq; - up.private_data = kzalloc(sizeof(struct msp_uart_data), GFP_KERNEL); - if (!up.private_data) { - pr_err("failed to allocate uart private data\n"); - return; - } - if (early_serial_setup(&up)) { - kfree(up.private_data); - pr_err("Early serial init of port 0 failed\n"); - } - - /* Initialize the second serial port, if one exists */ - switch (mips_machtype) { - case MACH_MSP4200_EVAL: - case MACH_MSP4200_GW: - case MACH_MSP4200_FPGA: - case MACH_MSP7120_EVAL: - case MACH_MSP7120_GW: - case MACH_MSP7120_FPGA: - /* Enable UART1 on MSP4200 and MSP7120 */ - *GPIO_CFG2_REG = 0x00002299; - break; - - default: - return; /* No second serial port, good-bye. */ - } - - up.mapbase = MSP_UART1_BASE; - up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); - up.irq = MSP_INT_UART1; - up.line = 1; - up.private_data = (void*)UART1_STATUS_REG; - if (early_serial_setup(&up)) { - kfree(up.private_data); - pr_err("Early serial init of port 1 failed\n"); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c deleted file mode 100644 index d1e59cec116e..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_setup.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The generic setup file for PMC-Sierra MSP processors - * - * Copyright 2005-2007 PMC-Sierra, Inc, - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - */ - -#include <linux/delay.h> - -#include <asm/bootinfo.h> -#include <asm/cacheflush.h> -#include <asm/idle.h> -#include <asm/r4kcache.h> -#include <asm/reboot.h> -#include <asm/smp-ops.h> -#include <asm/time.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -#if defined(CONFIG_PMC_MSP7120_GW) -#include <msp_regops.h> -#define MSP_BOARD_RESET_GPIO 9 -#endif - -extern void msp_serial_setup(void); - -#if defined(CONFIG_PMC_MSP7120_EVAL) || \ - defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) -/* - * Performs the reset for MSP7120-based boards - */ -void msp7120_reset(void) -{ - void *start, *end, *iptr; - - /* Diasble all interrupts */ - local_irq_disable(); -#ifdef CONFIG_SYS_SUPPORTS_MULTITHREADING - dvpe(); -#endif - - /* Cache the reset code of this function */ - __asm__ __volatile__ ( - " .set push \n" - " .set arch=r4000 \n" - " la %0,startpoint \n" - " la %1,endpoint \n" - " .set pop \n" - : "=r" (start), "=r" (end) - : - ); - - for (iptr = (void *)((unsigned int)start & ~(L1_CACHE_BYTES - 1)); - iptr < end; iptr += L1_CACHE_BYTES) - cache_op(Fill, iptr); - - __asm__ __volatile__ ( - "startpoint: \n" - ); - - /* Put the DDRC into self-refresh mode */ - DDRC_INDIRECT_WRITE(DDRC_CTL(10), 0xb, 1 << 16); - - /* - * IMPORTANT! - * DO NOT do anything from here on out that might even - * think about fetching from RAM - i.e., don't call any - * non-inlined functions, and be VERY sure that any inline - * functions you do call do NOT access any sort of RAM - * anywhere! - */ - - /* Wait a bit for the DDRC to settle */ - mdelay(125); - -#if defined(CONFIG_PMC_MSP7120_GW) - /* - * Set GPIO 9 HI, (tied to board reset logic) - * GPIO 9 is the 4th GPIO of register 3 - * - * NOTE: We cannot use the higher-level msp_gpio_mode()/out() - * as GPIO char driver may not be enabled and it would look up - * data inRAM! - */ - set_value_reg32(GPIO_CFG3_REG, 0xf000, 0x8000); - set_reg32(GPIO_DATA3_REG, 8); - - /* - * In case GPIO9 doesn't reset the board (jumper configurable!) - * fallback to device reset below. - */ -#endif - /* Set bit 1 of the MSP7120 reset register */ - *RST_SET_REG = 0x00000001; - - __asm__ __volatile__ ( - "endpoint: \n" - ); -} -#endif - -void msp_restart(char *command) -{ - printk(KERN_WARNING "Now rebooting .......\n"); - -#if defined(CONFIG_PMC_MSP7120_EVAL) || \ - defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) - msp7120_reset(); -#else - /* No chip-specific reset code, just jump to the ROM reset vector */ - set_c0_status(ST0_BEV | ST0_ERL); - change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED); - __flush_cache_all(); - write_c0_wired(0); - - __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); -#endif -} - -void msp_halt(void) -{ - printk(KERN_WARNING "\n** You can safely turn off the power\n"); - while (1) - /* If possible call official function to get CPU WARs */ - if (cpu_wait) - (*cpu_wait)(); - else - __asm__(".set\tmips3\n\t" "wait\n\t" ".set\tmips0"); -} - -void msp_power_off(void) -{ - msp_halt(); -} - -void __init plat_mem_setup(void) -{ - _machine_restart = msp_restart; - _machine_halt = msp_halt; - pm_power_off = msp_power_off; -} - -void __init prom_init(void) -{ - unsigned long family; - unsigned long revision; - - prom_argc = fw_arg0; - prom_argv = (char **)fw_arg1; - prom_envp = (char **)fw_arg2; - - /* - * Someday we can use this with PMON2000 to get a - * platform call prom routines for output etc. without - * having to use grody hacks. For now it's unused. - * - * struct callvectors *cv = (struct callvectors *) fw_arg3; - */ - family = identify_family(); - revision = identify_revision(); - - switch (family) { - case FAMILY_FPGA: - if (FPGA_IS_MSP4200(revision)) { - /* Old-style revision ID */ - mips_machtype = MACH_MSP4200_FPGA; - } else { - mips_machtype = MACH_MSP_OTHER; - } - break; - - case FAMILY_MSP4200: -#if defined(CONFIG_PMC_MSP4200_EVAL) - mips_machtype = MACH_MSP4200_EVAL; -#elif defined(CONFIG_PMC_MSP4200_GW) - mips_machtype = MACH_MSP4200_GW; -#else - mips_machtype = MACH_MSP_OTHER; -#endif - break; - - case FAMILY_MSP4200_FPGA: - mips_machtype = MACH_MSP4200_FPGA; - break; - - case FAMILY_MSP7100: -#if defined(CONFIG_PMC_MSP7120_EVAL) - mips_machtype = MACH_MSP7120_EVAL; -#elif defined(CONFIG_PMC_MSP7120_GW) - mips_machtype = MACH_MSP7120_GW; -#else - mips_machtype = MACH_MSP_OTHER; -#endif - break; - - case FAMILY_MSP7100_FPGA: - mips_machtype = MACH_MSP7120_FPGA; - break; - - default: - /* we don't recognize the machine */ - mips_machtype = MACH_UNKNOWN; - panic("***Bogosity factor five***, exiting"); - break; - } - - prom_init_cmdline(); - - prom_meminit(); - - /* - * Sub-system setup follows. - * Setup functions can either be called here or using the - * subsys_initcall mechanism (i.e. see msp_pci_setup). The - * order in which they are called can be changed by using the - * link order in arch/mips/pmc-sierra/msp71xx/Makefile. - * - * NOTE: Please keep sub-system specific initialization code - * in separate specific files. - */ - msp_serial_setup(); - - register_vsmp_smp_ops(); -} diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c deleted file mode 100644 index 00092e2924ec..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_smp.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2000, 2001, 2004 MIPS Technologies, Inc. - * Copyright (C) 2001 Ralf Baechle - * Copyright (C) 2010 PMC-Sierra, Inc. - * - * VSMP support for MSP platforms . Derived from malta vsmp support. - */ -#include <linux/smp.h> -#include <linux/interrupt.h> - -#include <asm/setup.h> - -#ifdef CONFIG_MIPS_MT_SMP -#define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */ -#define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for call */ - - -static void ipi_resched_dispatch(void) -{ - do_IRQ(MIPS_CPU_IPI_RESCHED_IRQ); -} - -static void ipi_call_dispatch(void) -{ - do_IRQ(MIPS_CPU_IPI_CALL_IRQ); -} - -static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) -{ - return IRQ_HANDLED; -} - -static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) -{ - generic_smp_call_function_interrupt(); - - return IRQ_HANDLED; -} - -void __init arch_init_ipiirq(int irq, const char *name, irq_handler_t handler) -{ - if (request_irq(irq, handler, IRQF_PERCPU, name, NULL)) - pr_err("Failed to request irq %d (%s)\n", irq, name); - irq_set_handler(irq, handle_percpu_irq); -} - -void __init msp_vsmp_int_init(void) -{ - set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); - set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); - arch_init_ipiirq(MIPS_CPU_IPI_RESCHED_IRQ, "IPI_resched", - ipi_resched_interrupt); - arch_init_ipiirq(MIPS_CPU_IPI_CALL_IRQ, "IPI_call", ipi_call_interrupt); -} -#endif /* CONFIG_MIPS_MT_SMP */ diff --git a/arch/mips/pmcs-msp71xx/msp_time.c b/arch/mips/pmcs-msp71xx/msp_time.c deleted file mode 100644 index 9c629829f447..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_time.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Setting up the clock on MSP SOCs. No RTC typically. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * ######################################################################## - * - * ######################################################################## - */ - -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/ptrace.h> - -#include <asm/cevt-r4k.h> -#include <asm/mipsregs.h> -#include <asm/time.h> - -#include <msp_prom.h> -#include <msp_int.h> -#include <msp_regs.h> - -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -static int tim_installed; - -void __init plat_time_init(void) -{ - char *endp, *s; - unsigned long cpu_rate = 0; - - if (cpu_rate == 0) { - s = prom_getenv("clkfreqhz"); - cpu_rate = simple_strtoul(s, &endp, 10); - if (endp != NULL && *endp != 0) { - printk(KERN_ERR - "Clock rate in Hz parse error: %s\n", s); - cpu_rate = 0; - } - } - - if (cpu_rate == 0) { - s = prom_getenv("clkfreq"); - cpu_rate = 1000 * simple_strtoul(s, &endp, 10); - if (endp != NULL && *endp != 0) { - printk(KERN_ERR - "Clock rate in MHz parse error: %s\n", s); - cpu_rate = 0; - } - } - - if (cpu_rate == 0) { -#if defined(CONFIG_PMC_MSP7120_EVAL) \ - || defined(CONFIG_PMC_MSP7120_GW) - cpu_rate = 400000000; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - cpu_rate = 25000000; -#else - cpu_rate = 150000000; -#endif - printk(KERN_ERR - "Failed to determine CPU clock rate, " - "assuming %ld hz ...\n", cpu_rate); - } - - printk(KERN_WARNING "Clock rate set to %ld\n", cpu_rate); - - /* timer frequency is 1/2 clock rate */ - mips_hpt_frequency = cpu_rate/2; -} - -unsigned int get_c0_compare_int(void) -{ - unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED; - - /* MIPS_MT modes may want timer for second VPE */ - if ((get_current_vpe()) && !tim_installed) { - if (request_irq(MSP_INT_VPE1_TIMER, c0_compare_interrupt, flags, - "timer", c0_compare_interrupt)) - pr_err("Failed to register timer interrupt\n"); - tim_installed++; - } - - return get_current_vpe() ? MSP_INT_VPE1_TIMER : MSP_INT_VPE0_TIMER; -} diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c deleted file mode 100644 index d38ac70b5a2e..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_usb.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * The setup file for USB related hardware on PMC-Sierra MSP processors. - * - * Copyright 2006 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET) - -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> - -#include <asm/mipsregs.h> - -#include <msp_regs.h> -#include <msp_int.h> -#include <msp_prom.h> -#include <msp_usb.h> - - -#if defined(CONFIG_USB_EHCI_HCD) -static struct resource msp_usbhost0_resources[] = { - [0] = { /* EHCI-HS operational and capabilities registers */ - .start = MSP_USB0_HS_START, - .end = MSP_USB0_HS_END, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_USB, - .end = MSP_INT_USB, - .flags = IORESOURCE_IRQ, - }, - [2] = { /* MSBus-to-AMBA bridge register space */ - .start = MSP_USB0_MAB_START, - .end = MSP_USB0_MAB_END, - .flags = IORESOURCE_MEM, - }, - [3] = { /* Identification and general hardware parameters */ - .start = MSP_USB0_ID_START, - .end = MSP_USB0_ID_END, - .flags = IORESOURCE_MEM, - }, -}; - -static u64 msp_usbhost0_dma_mask = 0xffffffffUL; - -static struct mspusb_device msp_usbhost0_device = { - .dev = { - .name = "pmcmsp-ehci", - .id = 0, - .dev = { - .dma_mask = &msp_usbhost0_dma_mask, - .coherent_dma_mask = 0xffffffffUL, - }, - .num_resources = ARRAY_SIZE(msp_usbhost0_resources), - .resource = msp_usbhost0_resources, - }, -}; -#endif /* CONFIG_USB_EHCI_HCD */ - -#if defined(CONFIG_USB_GADGET) -static struct resource msp_usbdev0_resources[] = { - [0] = { /* EHCI-HS operational and capabilities registers */ - .start = MSP_USB0_HS_START, - .end = MSP_USB0_HS_END, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_USB, - .end = MSP_INT_USB, - .flags = IORESOURCE_IRQ, - }, - [2] = { /* MSBus-to-AMBA bridge register space */ - .start = MSP_USB0_MAB_START, - .end = MSP_USB0_MAB_END, - .flags = IORESOURCE_MEM, - }, - [3] = { /* Identification and general hardware parameters */ - .start = MSP_USB0_ID_START, - .end = MSP_USB0_ID_END, - .flags = IORESOURCE_MEM, - }, -}; - -static u64 msp_usbdev_dma_mask = 0xffffffffUL; - -/* This may need to be converted to a mspusb_device, too. */ -static struct mspusb_device msp_usbdev0_device = { - .dev = { - .name = "msp71xx_udc", - .id = 0, - .dev = { - .dma_mask = &msp_usbdev_dma_mask, - .coherent_dma_mask = 0xffffffffUL, - }, - .num_resources = ARRAY_SIZE(msp_usbdev0_resources), - .resource = msp_usbdev0_resources, - }, -}; -#endif /* CONFIG_USB_GADGET */ - -static int __init msp_usb_setup(void) -{ - char *strp; - char envstr[32]; - struct platform_device *msp_devs[NUM_USB_DEVS]; - unsigned int val; - - /* construct environment name usbmode */ - /* set usbmode <host/device> as pmon environment var */ - /* - * Could this perhaps be integrated into the "features" env var? - * Use the features key "U", and follow with "H" for host-mode, - * "D" for device-mode. If it works for Ethernet, why not USB... - * -- hammtrev, 2007/03/22 - */ - snprintf(&envstr[0], sizeof(envstr), "usbmode"); - - /* set default host mode */ - val = 1; - - /* get environment string */ - strp = prom_getenv(&envstr[0]); - if (strp) { - /* compare string */ - if (!strcmp(strp, "device")) - val = 0; - } - - if (val) { -#if defined(CONFIG_USB_EHCI_HCD) - msp_devs[0] = &msp_usbhost0_device.dev; - ppfinit("platform add USB HOST done %s.\n", msp_devs[0]->name); -#else - ppfinit("%s: echi_hcd not supported\n", __FILE__); -#endif /* CONFIG_USB_EHCI_HCD */ - } else { -#if defined(CONFIG_USB_GADGET) - /* get device mode structure */ - msp_devs[0] = &msp_usbdev0_device.dev; - ppfinit("platform add USB DEVICE done %s.\n" - , msp_devs[0]->name); -#else - ppfinit("%s: usb_gadget not supported\n", __FILE__); -#endif /* CONFIG_USB_GADGET */ - } - /* add device */ - platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs)); - - return 0; -} - -subsys_initcall(msp_usb_setup); -#endif /* CONFIG_USB_EHCI_HCD || CONFIG_USB_GADGET */ diff --git a/arch/mips/pnx833x/Platform b/arch/mips/pnx833x/Platform index 287260669551..e5286a49fc3e 100644 --- a/arch/mips/pnx833x/Platform +++ b/arch/mips/pnx833x/Platform @@ -1,5 +1,4 @@ # NXP STB225 -platform-$(CONFIG_SOC_PNX833X) += pnx833x/ cflags-$(CONFIG_SOC_PNX833X) += -I$(srctree)/arch/mips/include/asm/mach-pnx833x load-$(CONFIG_NXP_STB220) += 0xffffffff80001000 load-$(CONFIG_NXP_STB225) += 0xffffffff80001000 diff --git a/arch/mips/ralink/Platform b/arch/mips/ralink/Platform index 6095fcc334f4..02ee0791481d 100644 --- a/arch/mips/ralink/Platform +++ b/arch/mips/ralink/Platform @@ -1,7 +1,6 @@ # # Ralink SoC common stuff # -core-$(CONFIG_RALINK) += arch/mips/ralink/ cflags-$(CONFIG_RALINK) += -I$(srctree)/arch/mips/include/asm/mach-ralink # diff --git a/arch/mips/ralink/bootrom.c b/arch/mips/ralink/bootrom.c index 88bcce59beeb..94ca8379b83c 100644 --- a/arch/mips/ralink/bootrom.c +++ b/arch/mips/ralink/bootrom.c @@ -31,7 +31,7 @@ static const struct file_operations bootrom_file_ops = { .release = single_release, }; -static int bootrom_setup(void) +static int __init bootrom_setup(void) { debugfs_create_file("bootrom", 0444, NULL, NULL, &bootrom_file_ops); return 0; diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index 0accb80db709..ca0ac607b0f3 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -20,11 +20,6 @@ #include "common.h" -#define SYSC_REG_SYSCFG 0x10 -#define SYSC_REG_CPLL_CLKCFG0 0x2c -#define SYSC_REG_CUR_CLK_STS 0x44 -#define CPU_CLK_SEL (BIT(30) | BIT(31)) - #define MT7621_GPIO_MODE_UART1 1 #define MT7621_GPIO_MODE_I2C 2 #define MT7621_GPIO_MODE_UART3_MASK 0x3 @@ -115,44 +110,6 @@ phys_addr_t mips_cpc_default_phys_base(void) panic("Cannot detect cpc address"); } -void __init ralink_clk_init(void) -{ - int cpu_fdiv = 0; - int cpu_ffrac = 0; - int fbdiv = 0; - u32 clk_sts, syscfg; - u8 clk_sel = 0, xtal_mode; - u32 cpu_clk; - - if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0) - clk_sel = 1; - - switch (clk_sel) { - case 0: - clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS); - cpu_fdiv = ((clk_sts >> 8) & 0x1F); - cpu_ffrac = (clk_sts & 0x1F); - cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000; - break; - - case 1: - fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1; - syscfg = rt_sysc_r32(SYSC_REG_SYSCFG); - xtal_mode = (syscfg >> 6) & 0x7; - if (xtal_mode >= 6) { - /* 25Mhz Xtal */ - cpu_clk = 25 * fbdiv * 1000 * 1000; - } else if (xtal_mode >= 3) { - /* 40Mhz Xtal */ - cpu_clk = 40 * fbdiv * 1000 * 1000; - } else { - /* 20Mhz Xtal */ - cpu_clk = 20 * fbdiv * 1000 * 1000; - } - break; - } -} - void __init ralink_of_remap(void) { rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc"); diff --git a/arch/mips/rb532/Platform b/arch/mips/rb532/Platform index aeec45a7cbb3..12eaa8790b3e 100644 --- a/arch/mips/rb532/Platform +++ b/arch/mips/rb532/Platform @@ -1,7 +1,6 @@ # # Routerboard 532 # -platform-$(CONFIG_MIKROTIK_RB532) += rb532/ cflags-$(CONFIG_MIKROTIK_RB532) += \ -I$(srctree)/arch/mips/include/asm/mach-rc32434 load-$(CONFIG_MIKROTIK_RB532) += 0xffffffff80101000 diff --git a/arch/mips/sgi-ip22/Platform b/arch/mips/sgi-ip22/Platform index e8f6b3a42a48..62fa30bb959e 100644 --- a/arch/mips/sgi-ip22/Platform +++ b/arch/mips/sgi-ip22/Platform @@ -7,7 +7,6 @@ # current variable will break so for 64-bit kernels we have to raise the start # address by 8kb. # -platform-$(CONFIG_SGI_IP22) += sgi-ip22/ cflags-$(CONFIG_SGI_IP22) += -I$(srctree)/arch/mips/include/asm/mach-ip22 ifdef CONFIG_32BIT load-$(CONFIG_SGI_IP22) += 0xffffffff88002000 @@ -29,6 +28,5 @@ ifdef CONFIG_SGI_IP28 $(error gcc doesn't support needed option -mr10k-cache-barrier=store) endif endif -platform-$(CONFIG_SGI_IP28) += sgi-ip22/ cflags-$(CONFIG_SGI_IP28) += -mr10k-cache-barrier=store -I$(srctree)/arch/mips/include/asm/mach-ip28 load-$(CONFIG_SGI_IP28) += 0xa800000020004000 diff --git a/arch/mips/sgi-ip27/Platform b/arch/mips/sgi-ip27/Platform index 1fb9c2ea7c8f..e734ee6abd44 100644 --- a/arch/mips/sgi-ip27/Platform +++ b/arch/mips/sgi-ip27/Platform @@ -5,8 +5,6 @@ # symmon, 0xc00000000001c000 for production kernels. Note that the value must # be 16kb aligned or the handling of the current variable will break. # -ifdef CONFIG_SGI_IP27 -platform-$(CONFIG_SGI_IP27) += sgi-ip27/ cflags-$(CONFIG_SGI_IP27) += -I$(srctree)/arch/mips/include/asm/mach-ip27 ifdef CONFIG_MAPPED_KERNEL load-$(CONFIG_SGI_IP27) += 0xc00000004001c000 @@ -16,4 +14,3 @@ else load-$(CONFIG_SGI_IP27) += 0xa80000000001c000 OBJCOPYFLAGS := --change-addresses=0x57ffffff80000000 endif -endif diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 61f3565f3645..c0e33632bc37 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -21,7 +21,6 @@ #include <asm/time.h> #include <asm/pgtable.h> #include <asm/sgialib.h> -#include <asm/sn/ioc3.h> #include <asm/sn/klconfig.h> #include <asm/sn/arch.h> #include <asm/sn/addrs.h> @@ -29,14 +28,6 @@ #include "ip27-common.h" -#define TICK_SIZE (tick_nsec / 1000) - -/* Includes for ioc3_init(). */ -#include <asm/sn/types.h> -#include <asm/pci/bridge.h> - -#include "ip27-common.h" - static int rt_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned int cpu = smp_processor_id(); diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform index 2b5695c2049a..f6f11517e091 100644 --- a/arch/mips/sgi-ip30/Platform +++ b/arch/mips/sgi-ip30/Platform @@ -1,8 +1,5 @@ # # SGI-IP30 (Octane/Octane2) # -ifdef CONFIG_SGI_IP30 -platform-$(CONFIG_SGI_IP30) += sgi-ip30/ cflags-$(CONFIG_SGI_IP30) += -I$(srctree)/arch/mips/include/asm/mach-ip30 load-$(CONFIG_SGI_IP30) += 0xa800000020004000 -endif diff --git a/arch/mips/sgi-ip32/Platform b/arch/mips/sgi-ip32/Platform index 0fea556f3641..f58a7a02b4ca 100644 --- a/arch/mips/sgi-ip32/Platform +++ b/arch/mips/sgi-ip32/Platform @@ -6,6 +6,5 @@ # a multiple of the kernel stack size or the handling of the current variable # will break. # -platform-$(CONFIG_SGI_IP32) += sgi-ip32/ cflags-$(CONFIG_SGI_IP32) += -I$(srctree)/arch/mips/include/asm/mach-ip32 load-$(CONFIG_SGI_IP32) += 0xffffffff80004000 diff --git a/arch/mips/sibyte/Platform b/arch/mips/sibyte/Platform index af117330ce14..65b2225b76b2 100644 --- a/arch/mips/sibyte/Platform +++ b/arch/mips/sibyte/Platform @@ -1,10 +1,6 @@ # # These are all rather similar so we consider them a single platform # -platform-$(CONFIG_SIBYTE_BCM112X) += sibyte/ -platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ -platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ -platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ # # Sibyte SB1250 / BCM1480 family of SOCs diff --git a/arch/mips/sni/Platform b/arch/mips/sni/Platform index 2644a9d63c0f..b0b3dde0bef8 100644 --- a/arch/mips/sni/Platform +++ b/arch/mips/sni/Platform @@ -1,7 +1,6 @@ # # SNI RM # -platform-$(CONFIG_SNI_RM) += sni/ cflags-$(CONFIG_SNI_RM) += -I$(srctree)/arch/mips/include/asm/mach-rm ifdef CONFIG_CPU_LITTLE_ENDIAN load-$(CONFIG_SNI_RM) += 0xffffffff80600000 diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c index adde79ce7fc0..dbd14ff05b4c 100644 --- a/arch/mips/tools/elf-entry.c +++ b/arch/mips/tools/elf-entry.c @@ -51,11 +51,14 @@ int main(int argc, const char *argv[]) nread = fread(&hdr, 1, sizeof(hdr), file); if (nread != sizeof(hdr)) { perror("Unable to read input file"); + fclose(file); return EXIT_FAILURE; } - if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); die("Input is not an ELF\n"); + } switch (hdr.ehdr32.e_ident[EI_CLASS]) { case ELFCLASS32: @@ -67,6 +70,7 @@ int main(int argc, const char *argv[]) entry = be32toh(hdr.ehdr32.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } @@ -83,14 +87,17 @@ int main(int argc, const char *argv[]) entry = be64toh(hdr.ehdr64.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } break; default: + fclose(file); die("Invalid ELF class\n"); } printf("0x%016" PRIx64 "\n", entry); + fclose(file); return EXIT_SUCCESS; } diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c index 0ebddd0ae46f..bdbc7b4324ec 100644 --- a/arch/mips/tools/loongson3-llsc-check.c +++ b/arch/mips/tools/loongson3-llsc-check.c @@ -303,5 +303,7 @@ out_munmap: out_close: close(vmlinux_fd); out_ret: + fprintf(stdout, "loongson3-llsc-check returns %s\n", + status ? "failure" : "success"); return status; } diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 9a22a182b7a4..85c4c121c71f 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -58,7 +58,7 @@ config TOSHIBA_RBTX4939 config SOC_TX3927 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select GPIO_TXX9 @@ -66,30 +66,30 @@ config SOC_TX3927 config SOC_TX4927 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select PCI_TX4927 select GPIO_TXX9 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config SOC_TX4938 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select PCI_TX4927 select GPIO_TXX9 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config SOC_TX4939 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select PCI_TX4927 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config TXX9_7SEGLED bool diff --git a/arch/mips/txx9/Platform b/arch/mips/txx9/Platform index a176d1fd5799..7f4429ba22eb 100644 --- a/arch/mips/txx9/Platform +++ b/arch/mips/txx9/Platform @@ -1,6 +1,3 @@ -platform-$(CONFIG_MACH_TX39XX) += txx9/ -platform-$(CONFIG_MACH_TX49XX) += txx9/ - cflags-$(CONFIG_MACH_TX39XX) += \ -I$(srctree)/arch/mips/include/asm/mach-tx39xx cflags-$(CONFIG_MACH_TX49XX) += \ diff --git a/arch/mips/vdso/Kconfig b/arch/mips/vdso/Kconfig new file mode 100644 index 000000000000..7aec721398d5 --- /dev/null +++ b/arch/mips/vdso/Kconfig @@ -0,0 +1,18 @@ +# For the pre-R6 code in arch/mips/vdso/vdso.h for locating +# the base address of VDSO, the linker will emit a R_MIPS_PC32 +# relocation in binutils > 2.25 but it will fail with older versions +# because that relocation is not supported for that symbol. As a result +# of which we are forced to disable the VDSO symbols when building +# with < 2.25 binutils on pre-R6 kernels. For more references on why we +# can't use other methods to get the base address of VDSO please refer to +# the comments on that file. +# +# GCC (at least up to version 9.2) appears to emit function calls that make use +# of the GOT when targeting microMIPS, which we can't use in the VDSO due to +# the lack of relocations. As such, we disable the VDSO for microMIPS builds. + +config MIPS_LD_CAN_LINK_VDSO + def_bool LD_VERSION >= 225000000 || LD_IS_LLD + +config MIPS_DISABLE_VDSO + def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d7fe8408603e..2e64c7600eea 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -52,44 +52,17 @@ endif CFLAGS_REMOVE_vgettimeofday.o = -pg -DISABLE_VDSO := n - -# -# For the pre-R6 code in arch/mips/vdso/vdso.h for locating -# the base address of VDSO, the linker will emit a R_MIPS_PC32 -# relocation in binutils > 2.25 but it will fail with older versions -# because that relocation is not supported for that symbol. As a result -# of which we are forced to disable the VDSO symbols when building -# with < 2.25 binutils on pre-R6 kernels. For more references on why we -# can't use other methods to get the base address of VDSO please refer to -# the comments on that file. -# -ifndef CONFIG_CPU_MIPSR6 - ifeq ($(call ld-ifversion, -lt, 225000000, y),y) +ifdef CONFIG_MIPS_DISABLE_VDSO + ifndef CONFIG_MIPS_LD_CAN_LINK_VDSO $(warning MIPS VDSO requires binutils >= 2.25) - DISABLE_VDSO := y endif -endif - -# -# GCC (at least up to version 9.2) appears to emit function calls that make use -# of the GOT when targeting microMIPS, which we can't use in the VDSO due to -# the lack of relocations. As such, we disable the VDSO for microMIPS builds. -# -ifdef CONFIG_CPU_MICROMIPS - DISABLE_VDSO := y -endif - -ifeq ($(DISABLE_VDSO),y) obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) - ccflags-vdso += -DDISABLE_MIPS_VDSO endif # VDSO linker flags. -VDSO_LDFLAGS := \ - -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \ - $(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \ - -nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id +ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ + -G 0 --eh-frame-hdr --hash-style=sysv --build-id -T CFLAGS_REMOVE_vdso.o = -pg @@ -108,11 +81,7 @@ quiet_cmd_vdso_mips_check = VDSOCHK $@ # quiet_cmd_vdsold_and_vdso_check = LD $@ - cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check) - -quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ - -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check); $(cmd_vdso_mips_check) quiet_cmd_vdsoas_o_S = AS $@ cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S index da4627430aba..d90b65724d78 100644 --- a/arch/mips/vdso/vdso.lds.S +++ b/arch/mips/vdso/vdso.lds.S @@ -91,7 +91,7 @@ PHDRS VERSION { LINUX_2.6 { -#ifndef DISABLE_MIPS_VDSO +#ifndef CONFIG_MIPS_DISABLE_VDSO global: __vdso_clock_gettime; __vdso_gettimeofday; diff --git a/arch/mips/vr41xx/Makefile b/arch/mips/vr41xx/Makefile new file mode 100644 index 000000000000..765020d5ee4d --- /dev/null +++ b/arch/mips/vr41xx/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +# +obj-$(CONFIG_MACH_VR41XX) += common/ +obj-$(CONFIG_CASIO_E55) += casio-e55/ +obj-$(CONFIG_IBM_WORKPAD) += ibm-workpad/ diff --git a/arch/mips/vr41xx/Platform b/arch/mips/vr41xx/Platform index b6c8d5c08ddb..3f593a3e5678 100644 --- a/arch/mips/vr41xx/Platform +++ b/arch/mips/vr41xx/Platform @@ -1,19 +1,16 @@ # # NEC VR4100 series based machines # -platform-$(CONFIG_MACH_VR41XX) += vr41xx/common/ cflags-$(CONFIG_MACH_VR41XX) += -I$(srctree)/arch/mips/include/asm/mach-vr41xx # # CASIO CASSIPEIA E-55/65 (VR4111) # -platform-$(CONFIG_CASIO_E55) += vr41xx/casio-e55/ load-$(CONFIG_CASIO_E55) += 0xffffffff80004000 # # IBM WorkPad z50 (VR4121) # -platform-$(CONFIG_IBM_WORKPAD) += vr41xx/ibm-workpad/ load-$(CONFIG_IBM_WORKPAD) += 0xffffffff80004000 # diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3801a2ef9bca..92128f9164ce 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -68,7 +68,7 @@ config PARISC The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, and later HP3000 series). The PA-RISC Linux project home page is - at <http://www.parisc-linux.org/>. + at <https://parisc.wiki.kernel.org>. config CPU_BIG_ENDIAN def_bool y diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 628cd8bb7ad8..fadbbd010337 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -21,8 +21,6 @@ KBUILD_IMAGE := vmlinuz NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 -LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) -export LIBGCC ifdef CONFIG_64BIT UTS_MACHINE := parisc64 @@ -110,6 +108,8 @@ cflags-$(CONFIG_PA8X00) += -march=2.0 -mschedule=8000 head-y := arch/parisc/kernel/head.o KBUILD_CFLAGS += $(cflags-y) +LIBGCC := $(shell $(CC) -print-libgcc-file-name) +export LIBGCC kernel-y := mm/ kernel/ math-emu/ diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 1e5879c6a752..dff453687530 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -16,6 +16,7 @@ targets += real2.S firmware.c KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -fno-strict-aliasing KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -fno-builtin-printf KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 1d976f2ebff0..665b70086685 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -4,7 +4,8 @@ * * PDC == Processor Dependent Code * - * See http://www.parisc-linux.org/documentation/index.html + * See PDC documentation at + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation * for documentation describing the entry points and calling * conventions defined below. * diff --git a/arch/parisc/kernel/hardware.c b/arch/parisc/kernel/hardware.c index 98c5203c1ab0..17161e72ea29 100644 --- a/arch/parisc/kernel/hardware.c +++ b/arch/parisc/kernel/hardware.c @@ -6,7 +6,8 @@ * * Based on the document "PA-RISC 1.1 I/O Firmware Architecture * Reference Specification", March 7, 1999, version 0.96. This - * is available at http://parisc-linux.org/documentation/ + * is available at + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation * * Copyright 1999 by Alex deVries <alex@onefishtwo.ca> * and copyright 1999 The Puffin Group Inc. diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 1c50093e2ebe..fac18c623d16 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -3,9 +3,9 @@ * * The best reference for this stuff is probably the Processor- * Specific ELF Supplement for PA-RISC: - * http://ftp.parisc-linux.org/docs/arch/elf-pa-hp.pdf + * https://parisc.wiki.kernel.org/index.php/File:Elf-pa-hp.pdf * - * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * Linux/PA-RISC Project * Copyright (C) 2003 Randolph Chung <tausq at debian . org> * Copyright (C) 2008 Helge Deller <deller@gmx.de> * diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 506e4df2d730..6e5d85ba588d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -78,7 +78,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; struct list_head preempt_list; spinlock_t lock; - struct swait_queue_head wq; + struct rcuwait wait; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1dc63101ffe1..337047ba4a56 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -751,7 +751,7 @@ struct kvm_vcpu_arch { u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ u32 last_inst; - struct swait_queue_head *wqp; + struct rcuwait *waitp; struct kvmppc_vcore *vcore; int ret; int trap; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5690a1f9b976..37508a356f28 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,41 +36,38 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ - /* #define EXIT_DEBUG */ struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "queue_intr", VCPU_STAT(queue_intr) }, - { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, - { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, - { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, - { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "pf_storage", VCPU_STAT(pf_storage) }, - { "sp_storage", VCPU_STAT(sp_storage) }, - { "pf_instruc", VCPU_STAT(pf_instruc) }, - { "sp_instruc", VCPU_STAT(sp_instruc) }, - { "ld", VCPU_STAT(ld) }, - { "ld_slow", VCPU_STAT(ld_slow) }, - { "st", VCPU_STAT(st) }, - { "st_slow", VCPU_STAT(st_slow) }, - { "pthru_all", VCPU_STAT(pthru_all) }, - { "pthru_host", VCPU_STAT(pthru_host) }, - { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, - { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, + VCPU_STAT("exits", sum_exits), + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("queue_intr", queue_intr), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("halt_wait_ns", halt_wait_ns), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_successful_wait", halt_successful_wait), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("pf_storage", pf_storage), + VCPU_STAT("sp_storage", sp_storage), + VCPU_STAT("pf_instruc", pf_instruc), + VCPU_STAT("sp_instruc", sp_instruc), + VCPU_STAT("ld", ld), + VCPU_STAT("ld_slow", ld_slow), + VCPU_STAT("st", st), + VCPU_STAT("st_slow", st_slow), + VCPU_STAT("pthru_all", pthru_all), + VCPU_STAT("pthru_host", pthru_host), + VCPU_STAT("pthru_bad_aff", pthru_bad_aff), + VM_STAT("largepages_2M", num_2M_pages, .mode = 0444), + VM_STAT("largepages_1G", num_1G_pages, .mode = 0444), { NULL } }; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 93493f0cbfe8..7f59c47a5b9d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu; - struct swait_queue_head *wqp; + struct rcuwait *waitp; - wqp = kvm_arch_vcpu_wq(vcpu); - if (swq_has_sleeper(wqp)) { - swake_up_one(wqp); + waitp = kvm_arch_vcpu_get_wait(vcpu); + if (rcuwait_wake_up(waitp)) ++vcpu->stat.halt_wakeup; - } cpu = READ_ONCE(vcpu->arch.thread_cpu); if (cpu >= 0 && kvmppc_ipi_thread(cpu)) @@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id) spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_swait_queue_head(&vcore->wq); + rcuwait_init(&vcore->wait); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = id; @@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) ktime_t cur, start_poll, start_wait; int do_sleep = 1; u64 block_ns; - DECLARE_SWAITQUEUE(wait); /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); @@ -3812,10 +3809,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } } - prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE); - + prepare_to_rcuwait(&vc->wait); + set_current_state(TASK_INTERRUPTIBLE); if (kvmppc_vcore_check_block(vc)) { - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); do_sleep = 0; /* If we polled, count this as a successful poll */ if (vc->halt_poll_ns) @@ -3829,7 +3826,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -3940,7 +3937,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - swake_up_one(&vc->wq); + rcuwait_wake_up(&vc->wait); } } @@ -4279,7 +4276,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } user_vrsave = mfspr(SPRN_VRSAVE); - vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c18ea88fd25..888afe8d35cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -35,29 +35,28 @@ unsigned long kvmppc_booke_handlers; -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, - { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, - { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, - { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "isi", VCPU_STAT(isi_exits) }, - { "dsi", VCPU_STAT(dsi_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "doorbell", VCPU_STAT(dbell_exits) }, - { "guest doorbell", VCPU_STAT(gdbell_exits) }, - { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("itlb_r", itlb_real_miss_exits), + VCPU_STAT("itlb_v", itlb_virt_miss_exits), + VCPU_STAT("dtlb_r", dtlb_real_miss_exits), + VCPU_STAT("dtlb_v", dtlb_virt_miss_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("isi", isi_exits), + VCPU_STAT("dsi", dsi_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("doorbell", dbell_exits), + VCPU_STAT("guest doorbell", gdbell_exits), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("remote_tlb_flush", remote_tlb_flush), { NULL } }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ad2f172c26a6..27ccff612903 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -752,7 +752,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) goto out_vcpu_uninit; - vcpu->arch.wqp = &vcpu->wq; + vcpu->arch.waitp = &vcpu->wait; kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; @@ -1765,8 +1765,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) return r; } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r; vcpu_load(vcpu); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 13e251699346..b2ba3e95bda7 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void) /* Standard hot unplug procedure */ idle_task_exit(); - current->active_mm = NULL; /* for sanity */ cpu = smp_processor_id(); DBG("CPU%d offline\n", cpu); generic_set_cpu_dead(cpu); diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 302934177760..b198eaa74456 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -13,8 +13,35 @@ #include <linux/filter.h> #include "bpf_jit.h" +/* + * Stack layout during BPF program execution: + * + * high + * RV32 fp => +----------+ + * | saved ra | + * | saved fp | RV32 callee-saved registers + * | ... | + * +----------+ <= (fp - 4 * NR_SAVED_REGISTERS) + * | hi(R6) | + * | lo(R6) | + * | hi(R7) | JIT scratch space for BPF registers + * | lo(R7) | + * | ... | + * BPF_REG_FP => +----------+ <= (fp - 4 * NR_SAVED_REGISTERS + * | | - 4 * BPF_JIT_SCRATCH_REGS) + * | | + * | ... | BPF program stack + * | | + * RV32 sp => +----------+ + * | | + * | ... | Function call stack + * | | + * +----------+ + * low + */ + enum { - /* Stack layout - these are offsets from (top of stack - 4). */ + /* Stack layout - these are offsets from top of JIT scratch space. */ BPF_R6_HI, BPF_R6_LO, BPF_R7_HI, @@ -29,7 +56,11 @@ enum { BPF_JIT_SCRATCH_REGS, }; -#define STACK_OFFSET(k) (-4 - ((k) * 4)) +/* Number of callee-saved registers stored to stack: ra, fp, s1--s7. */ +#define NR_SAVED_REGISTERS 9 + +/* Offset from fp for BPF registers stored on stack. */ +#define STACK_OFFSET(k) (-4 - (4 * NR_SAVED_REGISTERS) - (4 * (k))) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) @@ -111,11 +142,9 @@ static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) { - int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 4; + int stack_adjust = ctx->stack_size; const s8 *r0 = bpf2rv32[BPF_REG_0]; - store_offset -= 4 * BPF_JIT_SCRATCH_REGS; - /* Set return value if not tail call. */ if (!is_tail_call) { emit(rv_addi(RV_REG_A0, lo(r0), 0), ctx); @@ -123,15 +152,15 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) } /* Restore callee-saved registers. */ - emit(rv_lw(RV_REG_RA, store_offset - 0, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_FP, store_offset - 4, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S1, store_offset - 8, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S2, store_offset - 12, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S3, store_offset - 16, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S4, store_offset - 20, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S5, store_offset - 24, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S6, store_offset - 28, RV_REG_SP), ctx); - emit(rv_lw(RV_REG_S7, store_offset - 32, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_RA, stack_adjust - 4, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_FP, stack_adjust - 8, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S1, stack_adjust - 12, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S2, stack_adjust - 16, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S3, stack_adjust - 20, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S4, stack_adjust - 24, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S5, stack_adjust - 28, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S6, stack_adjust - 32, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S7, stack_adjust - 36, RV_REG_SP), ctx); emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); @@ -770,12 +799,13 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); /* - * if ((temp_tcc = tcc - 1) < 0) + * temp_tcc = tcc - 1; + * if (tcc < 0) * goto out; */ emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - emit_bcc(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx); + emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); /* * prog = array->ptrs[index]; @@ -1259,17 +1289,20 @@ notsupported: void bpf_jit_build_prologue(struct rv_jit_context *ctx) { - /* Make space to save 9 registers: ra, fp, s1--s7. */ - int stack_adjust = 9 * sizeof(u32), store_offset, bpf_stack_adjust; const s8 *fp = bpf2rv32[BPF_REG_FP]; const s8 *r1 = bpf2rv32[BPF_REG_1]; - - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + int stack_adjust = 0; + int bpf_stack_adjust = + round_up(ctx->prog->aux->stack_depth, STACK_ALIGN); + + /* Make space for callee-saved registers. */ + stack_adjust += NR_SAVED_REGISTERS * sizeof(u32); + /* Make space for BPF registers on stack. */ + stack_adjust += BPF_JIT_SCRATCH_REGS * sizeof(u32); + /* Make space for BPF stack. */ stack_adjust += bpf_stack_adjust; - - store_offset = stack_adjust - 4; - - stack_adjust += 4 * BPF_JIT_SCRATCH_REGS; + /* Round up for stack alignment. */ + stack_adjust = round_up(stack_adjust, STACK_ALIGN); /* * The first instruction sets the tail-call-counter (TCC) register. @@ -1280,24 +1313,24 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); /* Save callee-save registers. */ - emit(rv_sw(RV_REG_SP, store_offset - 0, RV_REG_RA), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 4, RV_REG_FP), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 8, RV_REG_S1), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 12, RV_REG_S2), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 16, RV_REG_S3), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 20, RV_REG_S4), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 24, RV_REG_S5), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 28, RV_REG_S6), ctx); - emit(rv_sw(RV_REG_SP, store_offset - 32, RV_REG_S7), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 4, RV_REG_RA), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 8, RV_REG_FP), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 12, RV_REG_S1), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 16, RV_REG_S2), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 20, RV_REG_S3), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 24, RV_REG_S4), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 28, RV_REG_S5), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 32, RV_REG_S6), ctx); + emit(rv_sw(RV_REG_SP, stack_adjust - 36, RV_REG_S7), ctx); /* Set fp: used as the base address for stacked BPF registers. */ emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); - /* Set up BPF stack pointer. */ + /* Set up BPF frame pointer. */ emit(rv_addi(lo(fp), RV_REG_SP, bpf_stack_adjust), ctx); emit(rv_addi(hi(fp), RV_REG_ZERO, 0), ctx); - /* Set up context pointer. */ + /* Set up BPF context pointer. */ emit(rv_addi(lo(r1), RV_REG_A0, 0), ctx); emit(rv_addi(hi(r1), RV_REG_ZERO, 0), ctx); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index d208a9fd6c52..6cfd164cbe88 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -515,7 +515,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_X: @@ -542,13 +542,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: - { - int shift = 64 - imm; - - emit(rv_slli(rd, rd, shift), ctx); - emit(rv_srli(rd, rd, shift), ctx); + switch (imm) { + case 16: + emit(rv_slli(rd, rd, 48), ctx); + emit(rv_srli(rd, rd, 48), ctx); + break; + case 32: + if (!aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case 64: + /* Do nothing */ + break; + } break; - } + case BPF_ALU | BPF_END | BPF_FROM_BE: emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx); @@ -692,19 +700,19 @@ out_be: case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -784,11 +792,15 @@ out_be: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP32 | BPF_JSET | BPF_K: rvoff = rv_offset(i, off, ctx); s = ctx->ninsns; - emit_imm(RV_REG_T1, imm, ctx); + if (imm) { + emit_imm(RV_REG_T1, imm, ctx); + rs = RV_REG_T1; + } else { + /* If imm is 0, simply use zero register. */ + rs = RV_REG_ZERO; + } if (!is64) { if (is_signed_bpf_cond(BPF_OP(code))) emit_sext_32_rd(&rd, ctx); @@ -799,16 +811,28 @@ out_be: /* Adjust for extra insns */ rvoff -= (e - s) << 2; + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); + break; - if (BPF_OP(code) == BPF_JSET) { - /* Adjust for and */ - rvoff -= 4; - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); - emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, - ctx); + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + rvoff = rv_offset(i, off, ctx); + s = ctx->ninsns; + if (is_12b_int(imm)) { + emit(rv_andi(RV_REG_T1, rd, imm), ctx); } else { - emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx); + emit_imm(RV_REG_T1, imm, ctx); + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); } + /* For jset32, we should clear the upper 32 bits of t1, but + * sign-extension is sufficient here and saves one instruction, + * as t1 is used only in comparison against zero. + */ + if (!is64 && imm < 0) + emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx); + e = ctx->ninsns; + rvoff -= (e - s) << 2; + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); break; /* function call */ diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index aa738cad1338..d74a4c7d5df6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev; */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { @@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd) */ static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; int rc; @@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, */ static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; int rc; @@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write, */ static int appldata_generic_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; struct list_head *lh; diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 7293c139dd79..ad3acb1e882b 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -36,11 +36,6 @@ struct ccwgroup_device { * @set_online: function called when device is set online * @set_offline: function called when device is set offline * @shutdown: function called when device is shut down - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @driver: embedded driver structure * @ccw_driver: supported ccw_driver (optional) */ @@ -50,11 +45,6 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); void (*shutdown)(struct ccwgroup_device *); - int (*prepare) (struct ccwgroup_device *); - void (*complete) (struct ccwgroup_device *); - int (*freeze)(struct ccwgroup_device *); - int (*thaw) (struct ccwgroup_device *); - int (*restore)(struct ccwgroup_device *); struct device_driver driver; struct ccw_driver *ccw_driver; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d6bcd34f3ec3..3d554887794e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -375,6 +375,8 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; u64 halt_no_poll_steal; u64 halt_wakeup; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 instruction_lctl; u64 instruction_lctlg; u64 instruction_stctl; @@ -971,7 +973,7 @@ struct kvm_arch_async_pf { unsigned long pfault_token; }; -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); @@ -982,6 +984,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {} + void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6d321f5f101d..636446003a06 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -867,7 +867,7 @@ static int debug_active = 1; * if debug_active is already off */ static int s390dbf_procactive(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 5f70cefc13e4..332b542548cd 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -594,7 +594,7 @@ static int __init topology_setup(char *str) early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int enabled = topology_is_enabled(); int new_mode; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bfb481134994..a4d4ca2769bd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) __airqs_kick_single_vcpu(kvm, pending_mask); hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); return HRTIMER_RESTART; - }; + } return HRTIMER_NORESTART; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d05bb040fd42..06bde4bad205 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -57,110 +57,109 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "userspace_handled", VCPU_STAT(exit_userspace) }, - { "exit_null", VCPU_STAT(exit_null) }, - { "exit_validity", VCPU_STAT(exit_validity) }, - { "exit_stop_request", VCPU_STAT(exit_stop_request) }, - { "exit_external_request", VCPU_STAT(exit_external_request) }, - { "exit_io_request", VCPU_STAT(exit_io_request) }, - { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, - { "exit_instruction", VCPU_STAT(exit_instruction) }, - { "exit_pei", VCPU_STAT(exit_pei) }, - { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, - { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, - { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, - { "instruction_lctl", VCPU_STAT(instruction_lctl) }, - { "instruction_stctl", VCPU_STAT(instruction_stctl) }, - { "instruction_stctg", VCPU_STAT(instruction_stctg) }, - { "deliver_ckc", VCPU_STAT(deliver_ckc) }, - { "deliver_cputm", VCPU_STAT(deliver_cputm) }, - { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, - { "deliver_external_call", VCPU_STAT(deliver_external_call) }, - { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, - { "deliver_virtio", VCPU_STAT(deliver_virtio) }, - { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, - { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, - { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, - { "deliver_program", VCPU_STAT(deliver_program) }, - { "deliver_io", VCPU_STAT(deliver_io) }, - { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, - { "exit_wait_state", VCPU_STAT(exit_wait_state) }, - { "inject_ckc", VCPU_STAT(inject_ckc) }, - { "inject_cputm", VCPU_STAT(inject_cputm) }, - { "inject_external_call", VCPU_STAT(inject_external_call) }, - { "inject_float_mchk", VM_STAT(inject_float_mchk) }, - { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, - { "inject_io", VM_STAT(inject_io) }, - { "inject_mchk", VCPU_STAT(inject_mchk) }, - { "inject_pfault_done", VM_STAT(inject_pfault_done) }, - { "inject_program", VCPU_STAT(inject_program) }, - { "inject_restart", VCPU_STAT(inject_restart) }, - { "inject_service_signal", VM_STAT(inject_service_signal) }, - { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, - { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, - { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, - { "inject_virtio", VM_STAT(inject_virtio) }, - { "instruction_epsw", VCPU_STAT(instruction_epsw) }, - { "instruction_gs", VCPU_STAT(instruction_gs) }, - { "instruction_io_other", VCPU_STAT(instruction_io_other) }, - { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, - { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, - { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, - { "instruction_ptff", VCPU_STAT(instruction_ptff) }, - { "instruction_stidp", VCPU_STAT(instruction_stidp) }, - { "instruction_sck", VCPU_STAT(instruction_sck) }, - { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, - { "instruction_spx", VCPU_STAT(instruction_spx) }, - { "instruction_stpx", VCPU_STAT(instruction_stpx) }, - { "instruction_stap", VCPU_STAT(instruction_stap) }, - { "instruction_iske", VCPU_STAT(instruction_iske) }, - { "instruction_ri", VCPU_STAT(instruction_ri) }, - { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, - { "instruction_sske", VCPU_STAT(instruction_sske) }, - { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, - { "instruction_essa", VCPU_STAT(instruction_essa) }, - { "instruction_stsi", VCPU_STAT(instruction_stsi) }, - { "instruction_stfl", VCPU_STAT(instruction_stfl) }, - { "instruction_tb", VCPU_STAT(instruction_tb) }, - { "instruction_tpi", VCPU_STAT(instruction_tpi) }, - { "instruction_tprot", VCPU_STAT(instruction_tprot) }, - { "instruction_tsch", VCPU_STAT(instruction_tsch) }, - { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, - { "instruction_sie", VCPU_STAT(instruction_sie) }, - { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, - { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, - { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, - { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, - { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, - { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, - { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, - { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, - { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, - { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, - { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, - { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, - { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, - { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, - { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, - { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, - { "instruction_diag_10", VCPU_STAT(diagnose_10) }, - { "instruction_diag_44", VCPU_STAT(diagnose_44) }, - { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, - { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, - { "instruction_diag_258", VCPU_STAT(diagnose_258) }, - { "instruction_diag_308", VCPU_STAT(diagnose_308) }, - { "instruction_diag_500", VCPU_STAT(diagnose_500) }, - { "instruction_diag_other", VCPU_STAT(diagnose_other) }, + VCPU_STAT("userspace_handled", exit_userspace), + VCPU_STAT("exit_null", exit_null), + VCPU_STAT("exit_validity", exit_validity), + VCPU_STAT("exit_stop_request", exit_stop_request), + VCPU_STAT("exit_external_request", exit_external_request), + VCPU_STAT("exit_io_request", exit_io_request), + VCPU_STAT("exit_external_interrupt", exit_external_interrupt), + VCPU_STAT("exit_instruction", exit_instruction), + VCPU_STAT("exit_pei", exit_pei), + VCPU_STAT("exit_program_interruption", exit_program_interruption), + VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), + VCPU_STAT("exit_operation_exception", exit_operation_exception), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("instruction_lctlg", instruction_lctlg), + VCPU_STAT("instruction_lctl", instruction_lctl), + VCPU_STAT("instruction_stctl", instruction_stctl), + VCPU_STAT("instruction_stctg", instruction_stctg), + VCPU_STAT("deliver_ckc", deliver_ckc), + VCPU_STAT("deliver_cputm", deliver_cputm), + VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), + VCPU_STAT("deliver_external_call", deliver_external_call), + VCPU_STAT("deliver_service_signal", deliver_service_signal), + VCPU_STAT("deliver_virtio", deliver_virtio), + VCPU_STAT("deliver_stop_signal", deliver_stop_signal), + VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), + VCPU_STAT("deliver_restart_signal", deliver_restart_signal), + VCPU_STAT("deliver_program", deliver_program), + VCPU_STAT("deliver_io", deliver_io), + VCPU_STAT("deliver_machine_check", deliver_machine_check), + VCPU_STAT("exit_wait_state", exit_wait_state), + VCPU_STAT("inject_ckc", inject_ckc), + VCPU_STAT("inject_cputm", inject_cputm), + VCPU_STAT("inject_external_call", inject_external_call), + VM_STAT("inject_float_mchk", inject_float_mchk), + VCPU_STAT("inject_emergency_signal", inject_emergency_signal), + VM_STAT("inject_io", inject_io), + VCPU_STAT("inject_mchk", inject_mchk), + VM_STAT("inject_pfault_done", inject_pfault_done), + VCPU_STAT("inject_program", inject_program), + VCPU_STAT("inject_restart", inject_restart), + VM_STAT("inject_service_signal", inject_service_signal), + VCPU_STAT("inject_set_prefix", inject_set_prefix), + VCPU_STAT("inject_stop_signal", inject_stop_signal), + VCPU_STAT("inject_pfault_init", inject_pfault_init), + VM_STAT("inject_virtio", inject_virtio), + VCPU_STAT("instruction_epsw", instruction_epsw), + VCPU_STAT("instruction_gs", instruction_gs), + VCPU_STAT("instruction_io_other", instruction_io_other), + VCPU_STAT("instruction_lpsw", instruction_lpsw), + VCPU_STAT("instruction_lpswe", instruction_lpswe), + VCPU_STAT("instruction_pfmf", instruction_pfmf), + VCPU_STAT("instruction_ptff", instruction_ptff), + VCPU_STAT("instruction_stidp", instruction_stidp), + VCPU_STAT("instruction_sck", instruction_sck), + VCPU_STAT("instruction_sckpf", instruction_sckpf), + VCPU_STAT("instruction_spx", instruction_spx), + VCPU_STAT("instruction_stpx", instruction_stpx), + VCPU_STAT("instruction_stap", instruction_stap), + VCPU_STAT("instruction_iske", instruction_iske), + VCPU_STAT("instruction_ri", instruction_ri), + VCPU_STAT("instruction_rrbe", instruction_rrbe), + VCPU_STAT("instruction_sske", instruction_sske), + VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), + VCPU_STAT("instruction_essa", instruction_essa), + VCPU_STAT("instruction_stsi", instruction_stsi), + VCPU_STAT("instruction_stfl", instruction_stfl), + VCPU_STAT("instruction_tb", instruction_tb), + VCPU_STAT("instruction_tpi", instruction_tpi), + VCPU_STAT("instruction_tprot", instruction_tprot), + VCPU_STAT("instruction_tsch", instruction_tsch), + VCPU_STAT("instruction_sthyi", instruction_sthyi), + VCPU_STAT("instruction_sie", instruction_sie), + VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), + VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), + VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), + VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), + VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), + VCPU_STAT("instruction_sigp_start", instruction_sigp_start), + VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), + VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), + VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), + VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), + VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), + VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), + VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), + VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), + VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), + VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), + VCPU_STAT("instruction_diag_10", diagnose_10), + VCPU_STAT("instruction_diag_44", diagnose_44), + VCPU_STAT("instruction_diag_9c", diagnose_9c), + VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), + VCPU_STAT("instruction_diag_258", diagnose_258), + VCPU_STAT("instruction_diag_308", diagnose_308), + VCPU_STAT("instruction_diag_500", diagnose_500), + VCPU_STAT("instruction_diag_other", diagnose_other), { NULL } }; @@ -3944,7 +3943,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, /* s390 will always inject the page directly */ } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { /* * s390 will always inject the page directly, @@ -4337,8 +4336,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) store_regs_fmt2(vcpu, kvm_run); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int rc; if (kvm_run->immediate_exit) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f6c22d72072..ef05b4e167fb 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1000,8 +1000,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) handle_last_fault(vcpu, vsie_page); - if (need_resched()) - schedule(); if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); @@ -1185,6 +1183,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) break; + cond_resched(); } if (rc == -EFAULT) { diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ae989b740376..36bce727897b 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -245,7 +245,7 @@ static int cmm_skip_blanks(char *cp, char **endp) } static int cmm_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_pages(); struct ctl_table ctl_entry = { @@ -264,7 +264,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_timed_pages(); @@ -284,7 +284,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timeout_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; long nr, seconds; @@ -297,8 +297,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, if (write) { len = min(*lenp, sizeof(buf)); - if (copy_from_user(buf, buffer, len)) - return -EFAULT; + memcpy(buf, buffer, len); buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); @@ -311,8 +310,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, cmm_timeout_pages, cmm_timeout_seconds); if (len > *lenp) len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; + memcpy(buffer, buf, len); *lenp = len; *ppos += len; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1a95d8809cc3..4b6903fbba4a 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; - unsigned long *table; + unsigned long *table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) - return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; + if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) + return NULL; + if (asce_type != _ASCE_TYPE_REGION1 && gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; - table = gmap->table; - switch (gmap->asce & _ASCE_TYPE_MASK) { + switch (asce_type) { case _ASCE_TYPE_REGION1: table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8d2134136290..f4242b894cf2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -503,7 +503,8 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) } else { /* j tail_call_start: NOP if no tail calls are used */ EMIT4_PCREL(0xa7f40000, 6); - _EMIT2(0); + /* bcr 0,%0 */ + EMIT2(0x0700, 0, REG_0); } /* Tail calls have to skip above initialization */ jit->tail_call_start = jit->prg; @@ -594,7 +595,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) * stack space for the large switch statement. */ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, - int i, bool extra_pass) + int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; u32 dst_reg = insn->dst_reg; @@ -1207,7 +1208,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, */ if (jit->seen & SEEN_STACK) - off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth; + off = STK_OFF_TCCNT + STK_OFF + stack_depth; else off = STK_OFF_TCCNT; /* lhi %w0,1 */ @@ -1249,7 +1250,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * Restore registers before calling function */ - save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth); + save_restore_regs(jit, REGS_RESTORE, stack_depth); /* * goto *(prog->bpf_func + tail_call_start); @@ -1519,7 +1520,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i) * Compile eBPF program into s390x code */ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, - bool extra_pass) + bool extra_pass, u32 stack_depth) { int i, insn_count, lit32_size, lit64_size; @@ -1527,18 +1528,18 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64 = jit->lit64_start; jit->prg = 0; - bpf_jit_prologue(jit, fp->aux->stack_depth); + bpf_jit_prologue(jit, stack_depth); if (bpf_set_addr(jit, 0) < 0) return -1; for (i = 0; i < fp->len; i += insn_count) { - insn_count = bpf_jit_insn(jit, fp, i, extra_pass); + insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); if (insn_count < 0) return -1; /* Next instruction address */ if (bpf_set_addr(jit, i + insn_count) < 0) return -1; } - bpf_jit_epilogue(jit, fp->aux->stack_depth); + bpf_jit_epilogue(jit, stack_depth); lit32_size = jit->lit32 - jit->lit32_start; lit64_size = jit->lit64 - jit->lit64_start; @@ -1569,6 +1570,7 @@ struct s390_jit_data { */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { + u32 stack_depth = round_up(fp->aux->stack_depth, 8); struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; struct s390_jit_data *jit_data; @@ -1621,7 +1623,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { fp = orig_fp; goto free_addrs; } @@ -1635,7 +1637,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } skip_init_ctx: - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5534b4335802..57d1c4e36738 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FILTER_PGPROT diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 13de0db38d4e..26b8c08e2fc4 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -15,3 +15,7 @@ config AS_SHA256_NI def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) help Supported by binutils >= 2.24 and LLVM integrated assembler +config AS_TPAUSE + def_bool $(as-instr,tpause %ecx) + help + Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ac232f456396..a5eed844e948 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1691,14 +1691,6 @@ SYM_CODE_START(general_protection) jmp common_exception SYM_CODE_END(general_protection) -#ifdef CONFIG_KVM_GUEST -SYM_CODE_START(async_page_fault) - ASM_CLAC - pushl $do_async_page_fault - jmp common_exception_read_cr2 -SYM_CODE_END(async_page_fault) -#endif - SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 64fe3d82157e..eead1e2bebd5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1202,10 +1202,6 @@ idtentry xendebug do_debug has_error_code=0 idtentry general_protection do_general_protection has_error_code=1 idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 -#ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 -#endif - #ifdef CONFIG_X86_MCE idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index de9e7841f953..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -3,8 +3,10 @@ #define _ASM_X86_DELAY_H #include <asm-generic/delay.h> +#include <linux/init.h> -void use_tsc_delay(void); +void __init use_tsc_delay(void); +void __init use_tpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 29336574d0bc..7a4d2062385c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -11,17 +11,6 @@ #include <linux/types.h> #include <asm/page.h> - -/* - * While not explicitly listed in the TLFS, Hyper-V always runs with a page size - * of 4096. These definitions are used when communicating with Hyper-V using - * guest physical pages and guest physical page addresses, since the guest page - * size may not be 4096 on all architectures. - */ -#define HV_HYP_PAGE_SHIFT 12 -#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) -#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) - /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -39,78 +28,41 @@ #define HYPERV_CPUID_MAX 0x4000ffff /* - * Feature identification. EAX indicates which features are available - * to the partition based upon the current partition privileges. - * These are HYPERV_CPUID_FEATURES.EAX bits. + * Aliases for Group A features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits. */ -/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ -#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0) -/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -/* - * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM - * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available - */ -#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2) -/* - * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through - * HV_X64_MSR_STIMER3_COUNT) available - */ -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -/* - * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) - * are available - */ -#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4) -/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ -#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5) -/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ -#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6) -/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ -#define HV_X64_MSR_RESET_AVAILABLE BIT(7) -/* - * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, - * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, - * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available - */ -#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8) -/* Partition reference TSC MSR is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -/* Partition Guest IDLE MSR is available */ -#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10) -/* - * There is a single feature flag that signifies if the partition has access - * to MSRs with local APIC and TSC frequencies. - */ -#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) -/* AccessReenlightenmentControls privilege */ -#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) -/* AccessTscInvariantControls privilege */ -#define HV_X64_ACCESS_TSC_INVARIANT BIT(15) +#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \ + HV_MSR_VP_RUNTIME_AVAILABLE +#define HV_X64_MSR_SYNIC_AVAILABLE \ + HV_MSR_SYNIC_AVAILABLE +#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \ + HV_MSR_APIC_ACCESS_AVAILABLE +#define HV_X64_MSR_HYPERCALL_AVAILABLE \ + HV_MSR_HYPERCALL_AVAILABLE +#define HV_X64_MSR_VP_INDEX_AVAILABLE \ + HV_MSR_VP_INDEX_AVAILABLE +#define HV_X64_MSR_RESET_AVAILABLE \ + HV_MSR_RESET_AVAILABLE +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \ + HV_MSR_GUEST_IDLE_AVAILABLE +#define HV_X64_ACCESS_FREQUENCY_MSRS \ + HV_ACCESS_FREQUENCY_MSRS +#define HV_X64_ACCESS_REENLIGHTENMENT \ + HV_ACCESS_REENLIGHTENMENT +#define HV_X64_ACCESS_TSC_INVARIANT \ + HV_ACCESS_TSC_INVARIANT /* - * Feature identification: indicates which flags were specified at partition - * creation. The format is the same as the partition creation flag structure - * defined in section Partition Creation Flags. - * These are HYPERV_CPUID_FEATURES.EBX bits. + * Aliases for Group B features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits. */ -#define HV_X64_CREATE_PARTITIONS BIT(0) -#define HV_X64_ACCESS_PARTITION_ID BIT(1) -#define HV_X64_ACCESS_MEMORY_POOL BIT(2) -#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_X64_POST_MESSAGES BIT(4) -#define HV_X64_SIGNAL_EVENTS BIT(5) -#define HV_X64_CREATE_PORT BIT(6) -#define HV_X64_CONNECT_PORT BIT(7) -#define HV_X64_ACCESS_STATS BIT(8) -#define HV_X64_DEBUGGING BIT(11) -#define HV_X64_CPU_POWER_MANAGEMENT BIT(12) +#define HV_X64_POST_MESSAGES HV_POST_MESSAGES +#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS /* - * Feature identification. EDX indicates which miscellaneous features - * are available to the partition. - * These are HYPERV_CPUID_FEATURES.EDX bits. + * Group D Features. The bit assignments are custom to each architecture. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits. */ /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ #define HV_X64_MWAIT_AVAILABLE BIT(0) @@ -131,6 +83,8 @@ #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +/* Support for debug MSRs available */ +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) /* stimer Direct Mode is available */ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) @@ -187,7 +141,7 @@ * processor, except for virtual processors that are reported as sibling SMT * threads. */ -#define HV_X64_NO_NONARCH_CORESHARING BIT(18) +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ #define HV_X64_NESTED_DIRECT_FLUSH BIT(17) @@ -295,43 +249,6 @@ union hv_x64_msr_hypercall_contents { } __packed; }; -/* - * TSC page layout. - */ -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -} __packed; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; @@ -355,31 +272,12 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) -/* - * Crash notification (HV_X64_MSR_CRASH_CTL) flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) #define HV_IPI_LOW_VECTOR 0x10 #define HV_IPI_HIGH_VECTOR 0xff -/* Declare the various hypercall operations. */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_RETARGET_INTERRUPT 0x007e -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 - #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ @@ -391,75 +289,6 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 -#define HV_PROCESSOR_POWER_STATE_C0 0 -#define HV_PROCESSOR_POWER_STATE_C1 1 -#define HV_PROCESSOR_POWER_STATE_C2 2 -#define HV_PROCESSOR_POWER_STATE_C3 3 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_PARTITION_ID_SELF ((u64)-1) - -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * The Hyper-V TimeRefCount register and the TSC - * page provide a guest VM clock with 100ns tick rate - */ -#define HV_CLOCK_HZ (NSEC_PER_SEC/100) - -typedef struct _HV_REFERENCE_TSC_PAGE { - __u32 tsc_sequence; - __u32 res1; - __u64 tsc_scale; - __s64 tsc_offset; -} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) -/* Valid SynIC vectors are 16-255. */ -#define HV_SYNIC_FIRST_VALID_VECTOR (16) - -#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) -#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) -#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) -#define HV_SYNIC_SINT_MASKED (1ULL << 16) -#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) -#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) - -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) /* Define hypervisor message types. */ enum hv_message_type { @@ -470,76 +299,25 @@ enum hv_message_type { HVMSG_GPA_INTERCEPT = 0x80000001, /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, + HVMSG_TIMER_EXPIRED = 0x80000010, /* Error messages. */ HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, /* Trace buffer complete messages. */ HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - __u8 asu8; - struct { - __u8 msg_pending:1; - __u8 reserved:7; - } __packed; -}; - -/* Define port identifier type. */ -union hv_port_id { - __u32 asu32; - struct { - __u32 id:24; - __u32 reserved:8; - } __packed u; + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 }; -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - __u32 message_type; - __u8 payload_size; - union hv_message_flags message_flags; - __u8 reserved[2]; - union { - __u64 sender; - union hv_port_id port; - }; -} __packed; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u; -} __packed; - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -} __packed; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ -} __packed; - struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; @@ -767,187 +545,11 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) - -/* - * Synthetic timer configuration. - */ -union hv_stimer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 apic_vector:8; - u64 direct_mode:1; - u64 reserved_z0:3; - u64 sintx:4; - u64 reserved_z1:44; - } __packed; -}; - - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - } __packed; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 polling:1; - u64 reserved2:45; - } __packed; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - } __packed; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - } __packed; -}; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -} __packed; - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -} __packed; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -} __packed; - -/* HvFlushGuestPhysicalAddressSpace hypercalls */ -struct hv_guest_mapping_flush { - u64 address_space; - u64 flags; -} __packed; - -/* - * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited - * by the bitwidth of "additional_pages" in union hv_gpa_page_range. - */ -#define HV_MAX_FLUSH_PAGES (2048) - -/* HvFlushGuestPhysicalAddressList hypercall */ -union hv_gpa_page_range { - u64 address_space; - struct { - u64 additional_pages:11; - u64 largepage:1; - u64 basepfn:52; - } page; -}; - -/* - * All input flush parameters should be in single page. The max flush - * count is equal with how many entries of union hv_gpa_page_range can - * be populated into the input parameter page. - */ -#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ - sizeof(union hv_gpa_page_range)) - -struct hv_guest_mapping_flush_list { - u64 address_space; - u64 flags; - union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; -}; - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - struct hv_partition_assist_pg { u32 tlb_lock_count; }; -union hv_msi_entry { - u64 as_uint64; - struct { - u32 address; - u32 data; - } __packed; -}; - -struct hv_interrupt_entry { - u32 source; /* 1 for MSI(-X) */ - u32 reserved1; - union hv_msi_entry msi_entry; -} __packed; -/* - * flags for hv_device_interrupt_target.flags - */ -#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 -#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 - -struct hv_device_interrupt_target { - u32 vector; - u32 flags; - union { - u64 vp_mask; - struct hv_vpset vp_set; - }; -} __packed; +#include <asm-generic/hyperv-tlfs.h> -/* HvRetargetDeviceInterrupt hypercall */ -struct hv_retarget_device_interrupt { - u64 partition_id; /* use "self" */ - u64 device_id; - struct hv_interrupt_entry int_entry; - u64 reserved2; - struct hv_device_interrupt_target int_target; -} __packed __aligned(8); #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e94b3de564d6..1da5858501ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -83,6 +83,10 @@ #define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -107,15 +111,8 @@ #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -enum { - PT_PAGE_TABLE_LEVEL = 1, - PT_DIRECTORY_LEVEL = 2, - PT_PDPE_LEVEL = 3, - /* set max level to the biggest one */ - PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, -}; -#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ - PT_PAGE_TABLE_LEVEL + 1) +#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G +#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) @@ -124,7 +121,7 @@ enum { static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) { - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } @@ -164,9 +161,13 @@ enum kvm_reg { NR_VCPU_REGS, VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR0, VCPU_EXREG_CR3, + VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, + VCPU_EXREG_EXIT_INFO_1, + VCPU_EXREG_EXIT_INFO_2, }; enum { @@ -182,8 +183,10 @@ enum { enum exit_fastpath_completion { EXIT_FASTPATH_NONE, - EXIT_FASTPATH_SKIP_EMUL_INS, + EXIT_FASTPATH_REENTER_GUEST, + EXIT_FASTPATH_EXIT_HANDLED, }; +typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; @@ -372,12 +375,12 @@ struct rsvd_bits_validate { }; struct kvm_mmu_root_info { - gpa_t cr3; + gpa_t pgd; hpa_t hpa; }; #define KVM_MMU_ROOT_INFO_INVALID \ - ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) #define KVM_MMU_NUM_PREV_ROOTS 3 @@ -403,7 +406,7 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - gpa_t root_cr3; + gpa_t root_pgd; union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; @@ -598,6 +601,7 @@ struct kvm_vcpu_arch { u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; + u64 perf_capabilities; /* * Paging state of the vcpu @@ -650,7 +654,6 @@ struct kvm_vcpu_arch { u64 xcr0; u64 guest_supported_xcr0; - u32 guest_xstate_size; struct kvm_pio_request pio; void *pio_data; @@ -680,6 +683,7 @@ struct kvm_vcpu_arch { struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; int maxphyaddr; + int tdp_level; /* emulate context */ @@ -703,6 +707,7 @@ struct kvm_vcpu_arch { struct gfn_to_pfn_cache cache; } st; + u64 l1_tsc_offset; u64 tsc_offset; u64 last_guest_tsc; u64 last_host_tsc; @@ -762,14 +767,17 @@ struct kvm_vcpu_arch { struct { bool halted; - gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + gfn_t gfns[ASYNC_PF_PER_VCPU]; struct gfn_to_hva_cache data; - u64 msr_val; + u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ + u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ + u16 vec; u32 id; bool send_user_only; - u32 host_apf_reason; + u32 host_apf_flags; unsigned long nested_apf_token; bool delivery_as_pf_vmexit; + bool pageready_pending; } apf; /* OSVW MSRs (AMD only) */ @@ -855,6 +863,18 @@ struct kvm_apic_map { struct kvm_lapic *phys_map[]; }; +/* Hyper-V synthetic debugger (SynDbg)*/ +struct kvm_hv_syndbg { + struct { + u64 control; + u64 status; + u64 send_page; + u64 recv_page; + u64 pending_page; + } control; + u64 options; +}; + /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; @@ -866,7 +886,7 @@ struct kvm_hv { u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; - HV_REFERENCE_TSC_PAGE tsc_ref; + struct ms_hyperv_tsc_page tsc_ref; struct idr conn_to_evt; @@ -878,6 +898,7 @@ struct kvm_hv { atomic_t num_mismatched_vp_indexes; struct hv_partition_assist_pg *hv_pa_pg; + struct kvm_hv_syndbg hv_syndbg; }; enum kvm_irqchip_mode { @@ -1028,6 +1049,8 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; }; struct x86_instruction_info; @@ -1059,7 +1082,7 @@ struct kvm_x86_ops { void (*hardware_disable)(void); void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(int index); + bool (*has_emulated_msr)(u32 index); void (*cpuid_update)(struct kvm_vcpu *vcpu); unsigned int vm_size; @@ -1085,8 +1108,6 @@ struct kvm_x86_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); - void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -1100,7 +1121,8 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); + void (*tlb_flush_all)(struct kvm_vcpu *vcpu); + void (*tlb_flush_current)(struct kvm_vcpu *vcpu); int (*tlb_remote_flush)(struct kvm *kvm); int (*tlb_remote_flush_with_range)(struct kvm *kvm, struct kvm_tlb_range *range); @@ -1113,7 +1135,13 @@ struct kvm_x86_ops { */ void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); - void (*run)(struct kvm_vcpu *vcpu); + /* + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), + * does not need to flush GPA->HPA mappings. + */ + void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); + + enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1126,8 +1154,8 @@ struct kvm_x86_ops { void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); - int (*interrupt_allowed)(struct kvm_vcpu *vcpu); - int (*nmi_allowed)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); @@ -1141,7 +1169,7 @@ struct kvm_x86_ops { bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); - void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -1153,7 +1181,6 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); /* Returns actual tsc_offset set in active VMCS */ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); @@ -1163,10 +1190,8 @@ struct kvm_x86_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage, struct x86_exception *exception); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1199,6 +1224,7 @@ struct kvm_x86_ops { /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + const struct kvm_x86_nested_ops *nested_ops; /* * Architecture specific hooks for vCPU blocking due to @@ -1226,18 +1252,10 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); - int (*get_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - unsigned user_data_size); - int (*set_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - struct kvm_nested_state *kvm_state); - bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); - - int (*smi_allowed)(struct kvm_vcpu *vcpu); + int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); - int (*enable_smi_window)(struct kvm_vcpu *vcpu); + void (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1245,14 +1263,28 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version); - uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); - bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + + void (*migrate_timers)(struct kvm_vcpu *vcpu); +}; + +struct kvm_x86_nested_ops { + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + + int (*enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); + uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { @@ -1451,6 +1483,8 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); @@ -1478,6 +1512,8 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); +void kvm_update_dr7(struct kvm_vcpu *vcpu); + int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); @@ -1508,8 +1544,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, + bool skip_mmu_sync); void kvm_configure_mmu(bool enable_tdp, int tdp_page_level); @@ -1573,8 +1612,6 @@ enum { }; #define HF_GIF_MASK (1 << 0) -#define HF_HIF_MASK (1 << 1) -#define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ @@ -1640,7 +1677,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a..57fd1966c4ea 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,11 +88,21 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); +void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); -u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); +u32 kvm_read_and_reset_apf_flags(void); +void kvm_disable_steal_time(void); +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + +DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + if (static_branch_unlikely(&kvm_async_pf_enabled)) + return __kvm_handle_async_pf(regs, token); + else + return false; +} #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); @@ -103,7 +113,7 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_async_pf_task_wait(T, I) do {} while(0) +#define kvm_async_pf_task_wait_schedule(T) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) @@ -121,7 +131,7 @@ static inline unsigned int kvm_arch_para_hints(void) return 0; } -static inline u32 kvm_read_and_reset_pf_reason(void) +static inline u32 kvm_read_and_reset_apf_flags(void) { return 0; } @@ -130,6 +140,11 @@ static inline void kvm_disable_steal_time(void) { return; } + +static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + return false; +} #endif #endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index b809f117f3f4..73d997aa2966 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -20,8 +20,10 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) -#define MWAITX_MAX_LOOPS ((u32)-1) +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX #define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 u32 get_umwait_control_msr(void); @@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_TPAUSE + asm volatile("tpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 6ece8561ba66..8a1f5382a4ea 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -96,7 +96,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[768]; }; @@ -203,8 +202,16 @@ struct __attribute__ ((__packed__)) vmcb_save_area { u64 last_excp_to; }; + +static inline void __unused_size_checks(void) +{ + BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298); + BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256); +} + struct __attribute__ ((__packed__)) vmcb { struct vmcb_control_area control; + u8 reserved_control[1024 - sizeof(struct vmcb_control_area)]; struct vmcb_save_area save; }; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5e090d1f03f8..cd7de4b401fe 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -527,10 +527,12 @@ struct vmx_msr_entry { /* * Exit Qualifications for entry failure during or after loading guest state */ -#define ENTRY_FAIL_DEFAULT 0 -#define ENTRY_FAIL_PDPTE 2 -#define ENTRY_FAIL_NMI 3 -#define ENTRY_FAIL_VMCS_LINK_PTR 4 +enum vm_entry_failure_code { + ENTRY_FAIL_DEFAULT = 0, + ENTRY_FAIL_PDPTE = 2, + ENTRY_FAIL_NMI = 3, + ENTRY_FAIL_VMCS_LINK_PTR = 4, +}; /* * Exit Qualifications for EPT Violations diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 96d9cd208610..6807153c0410 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -50,14 +50,12 @@ struct x86_init_resources { * @pre_vector_init: init code to run before interrupt vectors * are set up. * @intr_init: interrupt init code - * @trap_init: platform specific trap setup * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); - void (*trap_init)(void); void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 3f3f780c8c65..17c5a038f42d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -385,32 +385,48 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 -#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ +#define KVM_STATE_NESTED_FORMAT_SVM 1 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008 +#define KVM_STATE_NESTED_GIF_SET 0x00000100 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 +#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 + +#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; }; struct kvm_vmx_nested_state_hdr { + __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; + __u64 preemption_timer_deadline; struct { __u16 flags; } smm; }; +struct kvm_svm_nested_state_data { + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; +}; + +struct kvm_svm_nested_state_hdr { + __u64 vmcb_pa; +}; + /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { __u16 flags; @@ -419,6 +435,7 @@ struct kvm_nested_state { union { struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; /* Pad the header to 128 bytes. */ __u8 pad[120]; @@ -431,6 +448,7 @@ struct kvm_nested_state { */ union { struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; } data; }; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 2a8e0b6b9805..812e9b4c1114 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -31,6 +31,7 @@ #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_HINTS_REALTIME 0 @@ -50,6 +51,8 @@ #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 #define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06 +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 struct kvm_steal_time { __u64 steal; @@ -81,6 +84,11 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + +/* MSR_KVM_ASYNC_PF_INT */ +#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) + /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 @@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt { #define KVM_PV_REASON_PAGE_READY 2 struct kvm_vcpu_pv_apf_data { - __u32 reason; - __u8 pad[60]; + /* Used for 'page not present' events delivered via #PF */ + __u32 flags; + + /* Used for 'page ready' events delivered via interrupt notification */ + __u32 token; + + __u8 pad[56]; __u32 enabled; }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index e95b72ec19bc..b8ff9e8ac0d5 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -150,6 +150,9 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" } +#define VMX_EXIT_REASON_FLAGS \ + { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } + #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 1cb3ca9bba49..1afbdd1dd777 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -39,8 +39,7 @@ static bool __read_mostly sched_itmt_capable; unsigned int __read_mostly sysctl_sched_itmt_enabled; static int sched_itmt_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned int old_sysctl; int ret; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6efe0410fb72..d6f22a3a1f7d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -35,6 +35,8 @@ #include <asm/tlb.h> #include <asm/cpuidle_haltpoll.h> +DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + static int kvmapf = 1; static int __init parse_no_kvmapf(char *arg) @@ -73,7 +75,6 @@ struct kvm_task_sleep_node { struct swait_queue_head wq; u32 token; int cpu; - bool halted; }; static struct kvm_task_sleep_head { @@ -96,77 +97,64 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -/* - * @interrupt_kernel: Is this called from a routine which interrupts the kernel - * (other than user space)? - */ -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) +static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node n, *e; - DECLARE_SWAITQUEUE(wait); - - rcu_irq_enter(); + struct kvm_task_sleep_node *e; raw_spin_lock(&b->lock); e = _find_apf_task(b, token); if (e) { /* dummy entry exist -> wake up was delivered ahead of PF */ hlist_del(&e->link); - kfree(e); raw_spin_unlock(&b->lock); - - rcu_irq_exit(); - return; + kfree(e); + return false; } - n.token = token; - n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || - (IS_ENABLED(CONFIG_PREEMPT_COUNT) - ? preempt_count() > 1 || rcu_preempt_depth() - : interrupt_kernel); - init_swait_queue_head(&n.wq); - hlist_add_head(&n.link, &b->list); + n->token = token; + n->cpu = smp_processor_id(); + init_swait_queue_head(&n->wq); + hlist_add_head(&n->link, &b->list); raw_spin_unlock(&b->lock); + return true; +} + +/* + * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled + * @token: Token to identify the sleep node entry + * + * Invoked from the async pagefault handling code or from the VM exit page + * fault handler. In both cases RCU is watching. + */ +void kvm_async_pf_task_wait_schedule(u32 token) +{ + struct kvm_task_sleep_node n; + DECLARE_SWAITQUEUE(wait); + + lockdep_assert_irqs_disabled(); + + if (!kvm_async_pf_queue_task(token, &n)) + return; for (;;) { - if (!n.halted) - prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break; - rcu_irq_exit(); - - if (!n.halted) { - local_irq_enable(); - schedule(); - local_irq_disable(); - } else { - /* - * We cannot reschedule. So halt. - */ - native_safe_halt(); - local_irq_disable(); - } - - rcu_irq_enter(); + local_irq_enable(); + schedule(); + local_irq_disable(); } - if (!n.halted) - finish_swait(&n.wq, &wait); - - rcu_irq_exit(); - return; + finish_swait(&n.wq, &wait); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (n->halted) - smp_send_reschedule(n->cpu); - else if (swq_has_sleeper(&n->wq)) + if (swq_has_sleeper(&n->wq)) swake_up_one(&n->wq); } @@ -175,12 +163,13 @@ static void apf_task_wake_all(void) int i; for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { - struct hlist_node *p, *next; struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; + struct kvm_task_sleep_node *n; + struct hlist_node *p, *next; + raw_spin_lock(&b->lock); hlist_for_each_safe(p, next, &b->list) { - struct kvm_task_sleep_node *n = - hlist_entry(p, typeof(*n), link); + n = hlist_entry(p, typeof(*n), link); if (n->cpu == smp_processor_id()) apf_task_wake_one(n); } @@ -221,46 +210,61 @@ again: n->cpu = smp_processor_id(); init_swait_queue_head(&n->wq); hlist_add_head(&n->link, &b->list); - } else + } else { apf_task_wake_one(n); + } raw_spin_unlock(&b->lock); return; } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); -u32 kvm_read_and_reset_pf_reason(void) +u32 kvm_read_and_reset_apf_flags(void) { - u32 reason = 0; + u32 flags = 0; if (__this_cpu_read(apf_reason.enabled)) { - reason = __this_cpu_read(apf_reason.reason); - __this_cpu_write(apf_reason.reason, 0); + flags = __this_cpu_read(apf_reason.flags); + __this_cpu_write(apf_reason.flags, 0); } - return reason; + return flags; } -EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); -NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); +EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); +NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags); -dotraplinkage void -do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) { - switch (kvm_read_and_reset_pf_reason()) { - default: - do_page_fault(regs, error_code, address); - break; + u32 reason = kvm_read_and_reset_apf_flags(); + + switch (reason) { case KVM_PV_REASON_PAGE_NOT_PRESENT: - /* page is swapped out by the host. */ - kvm_async_pf_task_wait((u32)address, !user_mode(regs)); - break; case KVM_PV_REASON_PAGE_READY: + break; + default: + return false; + } + + /* + * If the host managed to inject an async #PF into an interrupt + * disabled region, then die hard as this is not going to end well + * and the host side is seriously broken. + */ + if (unlikely(!(regs->flags & X86_EFLAGS_IF))) + panic("Host injected async #PF in interrupt disabled region\n"); + + if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) { + if (unlikely(!(user_mode(regs)))) + panic("Host injected async #PF in kernel mode\n"); + /* Page is swapped out by the host. */ + kvm_async_pf_task_wait_schedule(token); + } else { rcu_irq_enter(); - kvm_async_pf_task_wake((u32)address); + kvm_async_pf_task_wake(token); rcu_irq_exit(); - break; } + return true; } -NOKPROBE_SYMBOL(do_async_page_fault); +NOKPROBE_SYMBOL(__kvm_handle_async_pf); static void __init paravirt_ops_setup(void) { @@ -306,11 +310,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); + u64 pa; -#ifdef CONFIG_PREEMPTION - pa |= KVM_ASYNC_PF_SEND_ALWAYS; -#endif + WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); + + pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); pa |= KVM_ASYNC_PF_ENABLED; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) @@ -318,12 +322,12 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - printk(KERN_INFO"KVM setup async PF for cpu %d\n", - smp_processor_id()); + pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { unsigned long pa; + /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); __this_cpu_write(kvm_apic_eoi, 0); @@ -344,8 +348,7 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", - smp_processor_id()); + pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); } static void kvm_pv_guest_cpu_reboot(void *unused) @@ -592,12 +595,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu) } #endif -static void __init kvm_apf_trap_init(void) -{ - update_intr_gate(X86_TRAP_PF, async_page_fault); -} - - static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) { @@ -632,8 +629,6 @@ static void __init kvm_guest_init(void) register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) raw_spin_lock_init(&async_pf_sleepers[i].lock); - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) - x86_init.irqs.trap_init = kvm_apf_trap_init; if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; @@ -649,6 +644,9 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) + static_branch_enable(&kvm_async_pf_enabled); + #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 106e7f87f534..371a6b348e44 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -103,6 +103,9 @@ static __init void x86_late_time_init(void) */ x86_init.irqs.intr_mode_init(); tsc_init(); + + if (static_cpu_has(X86_FEATURE_WAITPKG)) + use_tpause_delay(); } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 428186d9de46..4cc541051994 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -903,7 +903,5 @@ void __init trap_init(void) idt_setup_ist_traps(); - x86_init.irqs.trap_init(); - idt_setup_debugidt_traps(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fdd4c1078632..49d925043171 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -41,6 +41,7 @@ EXPORT_SYMBOL(tsc_khz); * TSC can be unstable due to cpufreq or due to unsynced TSCs */ static int __read_mostly tsc_unstable; +static unsigned int __initdata tsc_early_khz; static DEFINE_STATIC_KEY_FALSE(__use_tsc); @@ -59,6 +60,12 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); +static int __init tsc_early_khz_setup(char *buf) +{ + return kstrtouint(buf, 0, &tsc_early_khz); +} +early_param("tsc_early_khz", tsc_early_khz_setup); + __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -1412,7 +1419,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early) if (early) { cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); + if (tsc_early_khz) + tsc_khz = tsc_early_khz; + else + tsc_khz = x86_platform.calibrate_tsc(); } else { /* We should not be here with non-native cpu calibration */ WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 85f1a90c55cd..123f1c1f1788 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -79,7 +79,6 @@ struct x86_init_ops x86_init __initdata = { .irqs = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, - .trap_init = x86_init_noop, .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 901cd1fdecd9..253b8e875ccd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -86,12 +86,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 0xD, 0); if (!best) { vcpu->arch.guest_supported_xcr0 = 0; - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; } else { vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - vcpu->arch.guest_xstate_size = best->ebx = - xstate_required_size(vcpu->arch.xcr0, false); + best->ebx = xstate_required_size(vcpu->arch.xcr0, false); } best = kvm_find_cpuid_entry(vcpu, 0xD, 1); @@ -124,8 +122,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) MSR_IA32_MISC_ENABLE_MWAIT); } - /* Update physical-address width */ + /* Note, maxphyaddr must be updated before tdp_level. */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); kvm_mmu_reset_context(vcpu); kvm_pmu_refresh(vcpu); @@ -297,7 +296,7 @@ void kvm_set_cpu_caps(void) F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | + F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | @@ -712,7 +711,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI) | (1 << KVM_FEATURE_POLL_CONTROL) | - (1 << KVM_FEATURE_PV_SCHED_YIELD); + (1 << KVM_FEATURE_PV_SCHED_YIELD) | + (1 << KVM_FEATURE_ASYNC_PF_INT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); @@ -728,6 +728,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; + case 0x80000006: + /* L2 cache and TLB: pass through host info. */ + break; case 0x80000007: /* Advanced power management */ /* invariant TSC is CPUID.80000007H:EDX[8] */ entry->edx &= (1 << 8); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 63a70f6a3df3..05434cd9342f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -303,4 +303,9 @@ static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature) kvm_cpu_cap_set(x86_feature); } +static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); +} + #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index bddaba9c68dd..de5476f8683e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5798,6 +5798,8 @@ writeback: } ctxt->eip = ctxt->_eip; + if (ctxt->mode != X86EMUL_MODE_PROT64) + ctxt->eip = (u32)ctxt->_eip; done: if (rc == X86EMUL_PROPAGATE_FAULT) { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 54d4b98b49e1..238b78e069fe 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -21,6 +21,7 @@ #include "x86.h" #include "lapic.h" #include "ioapic.h" +#include "cpuid.h" #include "hyperv.h" #include <linux/cpu.h> @@ -266,6 +267,123 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, return ret; } +static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_find_cpuid_entry(vcpu, + HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, + 0); + if (!entry) + return false; + + return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; +} + +static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL) + hv->hv_syndbg.control.status = + vcpu->run->hyperv.u.syndbg.status; + return 1; +} + +static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; + + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG; + hv_vcpu->exit.u.syndbg.msr = msr; + hv_vcpu->exit.u.syndbg.control = syndbg->control.control; + hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page; + hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page; + hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page; + vcpu->arch.complete_userspace_io = + kvm_hv_syndbg_complete_userspace; + + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); +} + +static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) + return 1; + + trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id, + vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data); + switch (msr) { + case HV_X64_MSR_SYNDBG_CONTROL: + syndbg->control.control = data; + if (!host) + syndbg_exit(vcpu, msr); + break; + case HV_X64_MSR_SYNDBG_STATUS: + syndbg->control.status = data; + break; + case HV_X64_MSR_SYNDBG_SEND_BUFFER: + syndbg->control.send_page = data; + break; + case HV_X64_MSR_SYNDBG_RECV_BUFFER: + syndbg->control.recv_page = data; + break; + case HV_X64_MSR_SYNDBG_PENDING_BUFFER: + syndbg->control.pending_page = data; + if (!host) + syndbg_exit(vcpu, msr); + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + syndbg->options = data; + break; + default: + break; + } + + return 0; +} + +static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) + return 1; + + switch (msr) { + case HV_X64_MSR_SYNDBG_CONTROL: + *pdata = syndbg->control.control; + break; + case HV_X64_MSR_SYNDBG_STATUS: + *pdata = syndbg->control.status; + break; + case HV_X64_MSR_SYNDBG_SEND_BUFFER: + *pdata = syndbg->control.send_page; + break; + case HV_X64_MSR_SYNDBG_RECV_BUFFER: + *pdata = syndbg->control.recv_page; + break; + case HV_X64_MSR_SYNDBG_PENDING_BUFFER: + *pdata = syndbg->control.pending_page; + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + *pdata = syndbg->options; + break; + default: + break; + } + + trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, + vcpu_to_hv_vcpu(vcpu)->vp_index, msr, + *pdata); + + return 0; +} + static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, bool host) { @@ -800,6 +918,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: r = true; break; } @@ -900,7 +1020,7 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, * These two equivalencies are implemented in this function. */ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, - HV_REFERENCE_TSC_PAGE *tsc_ref) + struct ms_hyperv_tsc_page *tsc_ref) { u64 max_mul; @@ -941,7 +1061,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, u64 gfn; BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); - BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); + BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) return; @@ -1061,6 +1181,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, if (!host) return 1; break; + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + return syndbg_set_msr(vcpu, msr, data, host); default: vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1190,7 +1313,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 0; } -static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, + bool host) { u64 data = 0; struct kvm *kvm = vcpu->kvm; @@ -1227,6 +1351,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_TSC_EMULATION_STATUS: data = hv->hv_tsc_emulation_status; break; + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + return syndbg_get_msr(vcpu, msr, pdata, host); default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1316,7 +1443,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) int r; mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); - r = kvm_hv_get_msr_pw(vcpu, msr, pdata); + r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host); mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else @@ -1425,8 +1552,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - kvm_make_vcpus_request_mask(kvm, - KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, NULL, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: @@ -1530,7 +1656,7 @@ ret_success: bool kvm_hv_hypercall_enabled(struct kvm *kvm) { - return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; + return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0; } static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) @@ -1709,6 +1835,34 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); break; + case HVCALL_POST_DEBUG_DATA: + case HVCALL_RETRIEVE_DEBUG_DATA: + if (unlikely(fast)) { + ret = HV_STATUS_INVALID_PARAMETER; + break; + } + fallthrough; + case HVCALL_RESET_DEBUG_SESSION: { + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu)) { + ret = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + } + + if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) { + ret = HV_STATUS_OPERATION_DENIED; + break; + } + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; + vcpu->run->hyperv.u.hcall.input = param; + vcpu->run->hyperv.u.hcall.params[0] = ingpa; + vcpu->run->hyperv.u.hcall.params[1] = outgpa; + vcpu->arch.complete_userspace_io = + kvm_hv_hypercall_complete_userspace; + return 0; + } default: ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -1796,12 +1950,15 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, { .function = HYPERV_CPUID_FEATURES }, { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, + { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS }, + { .function = HYPERV_CPUID_SYNDBG_INTERFACE }, + { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }, { .function = HYPERV_CPUID_NESTED_FEATURES }, }; int i, nent = ARRAY_SIZE(cpuid_entries); - if (kvm_x86_ops.nested_get_evmcs_version) - evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu); + if (kvm_x86_ops.nested_ops->get_evmcs_version) + evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu); /* Skip NESTED_FEATURES if eVMCS is not supported */ if (!evmcs_ver) @@ -1821,7 +1978,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: memcpy(signature, "Linux KVM Hv", 12); - ent->eax = HYPERV_CPUID_NESTED_FEATURES; + ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; ent->ebx = signature[0]; ent->ecx = signature[1]; ent->edx = signature[2]; @@ -1860,6 +2017,10 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + ent->ebx |= HV_DEBUGGING; + ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; + ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + /* * Direct Synthetic timers only make sense with in-kernel * LAPIC @@ -1903,6 +2064,24 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, break; + case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: + memcpy(signature, "Linux KVM Hv", 12); + + ent->eax = 0; + ent->ebx = signature[0]; + ent->ecx = signature[1]; + ent->edx = signature[2]; + break; + + case HYPERV_CPUID_SYNDBG_INTERFACE: + memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); + ent->eax = signature[0]; + break; + + case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES: + ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + break; + default: break; } diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 757cb578101c..e68c6c2e9649 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -23,6 +23,33 @@ #include <linux/kvm_host.h> +/* + * The #defines related to the synthetic debugger are required by KDNet, but + * they are not documented in the Hyper-V TLFS because the synthetic debugger + * functionality has been deprecated and is subject to removal in future + * versions of Windows. + */ +#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 +#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081 +#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 + +/* + * Hyper-V synthetic debugger platform capabilities + * These are HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX bits. + */ +#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1) + +/* Hyper-V Synthetic debug options MSR */ +#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 +#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 +#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 +#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 +#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 +#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF + +/* Hyper-V HV_X64_MSR_SYNDBG_OPTIONS bits */ +#define HV_X64_SYNDBG_OPTION_USE_HCALLS BIT(2) + static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) { return &vcpu->arch.hyperv; @@ -46,6 +73,11 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); } +static inline struct kvm_hv_syndbg *vcpu_to_hv_syndbg(struct kvm_vcpu *vcpu) +{ + return &vcpu->kvm->arch.hyperv.hv_syndbg; +} + int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 2fb2e3c80724..660401700075 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,8 +3,8 @@ #define __KVM_IO_APIC_H #include <linux/kvm_host.h> - #include <kvm/iodev.h> +#include "irq.h" struct kvm; struct kvm_vcpu; @@ -108,11 +108,7 @@ do { \ static inline int ioapic_in_kernel(struct kvm *kvm) { - int mode = kvm->arch.irqchip_mode; - - /* Matches smp_wmb() when setting irqchip_mode */ - smp_rmb(); - return mode == KVM_IRQCHIP_KERNEL; + return irqchip_kernel(kvm); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index e330e7d125f7..99d118ffc67d 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -83,6 +83,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } +EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); /* * check if there is pending interrupt without @@ -159,6 +160,8 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); + if (kvm_x86_ops.migrate_timers) + kvm_x86_ops.migrate_timers(vcpu); } bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f173ab6b407e..9b64abf9b3f1 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -16,7 +16,6 @@ #include <linux/spinlock.h> #include <kvm/iodev.h> -#include "ioapic.h" #include "lapic.h" #define PIC_NUM_PINS 16 @@ -66,15 +65,6 @@ void kvm_pic_destroy(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -static inline int pic_in_kernel(struct kvm *kvm) -{ - int mode = kvm->arch.irqchip_mode; - - /* Matches smp_wmb() when setting irqchip_mode */ - smp_rmb(); - return mode == KVM_IRQCHIP_KERNEL; -} - static inline int irqchip_split(struct kvm *kvm) { int mode = kvm->arch.irqchip_mode; @@ -93,6 +83,11 @@ static inline int irqchip_kernel(struct kvm *kvm) return mode == KVM_IRQCHIP_KERNEL; } +static inline int pic_in_kernel(struct kvm *kvm) +{ + return irqchip_kernel(kvm); +} + static inline int irqchip_in_kernel(struct kvm *kvm) { int mode = kvm->arch.irqchip_mode; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 62558b9bdda7..ff2d0e9ca3bc 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -116,8 +116,9 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; - if (tmask & vcpu->arch.cr0_guest_owned_bits) - kvm_x86_ops.decache_cr0_guest_bits(vcpu); + if ((tmask & vcpu->arch.cr0_guest_owned_bits) && + !kvm_register_is_available(vcpu, VCPU_EXREG_CR0)) + kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR0); return vcpu->arch.cr0 & mask; } @@ -129,8 +130,9 @@ static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; - if (tmask & vcpu->arch.cr4_guest_owned_bits) - kvm_x86_ops.decache_cr4_guest_bits(vcpu); + if ((tmask & vcpu->arch.cr4_guest_owned_bits) && + !kvm_register_is_available(vcpu, VCPU_EXREG_CR4)) + kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR4); return vcpu->arch.cr4 & mask; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9af25c97612a..34a7e0533dad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -36,6 +36,7 @@ #include <linux/jump_label.h> #include "kvm_cache_regs.h" #include "irq.h" +#include "ioapic.h" #include "trace.h" #include "x86.h" #include "cpuid.h" @@ -110,11 +111,18 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) return apic->vcpu->vcpu_id; } -bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) +static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); } -EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); + +bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops.set_hv_timer + && !(kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { @@ -1593,7 +1601,7 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) } } -static void apic_timer_expired(struct kvm_lapic *apic) +static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) { struct kvm_vcpu *vcpu = apic->vcpu; struct kvm_timer *ktimer = &apic->lapic_timer; @@ -1604,6 +1612,12 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; + if (!from_timer_fn && vcpu->arch.apicv_active) { + WARN_ON(kvm_get_running_vcpu() != vcpu); + kvm_apic_inject_pending_timer_irqs(apic); + return; + } + if (kvm_use_posted_timer_interrupt(apic->vcpu)) { if (apic->lapic_timer.timer_advance_ns) __kvm_wait_lapic_expire(vcpu); @@ -1643,18 +1657,23 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); } else - apic_timer_expired(apic); + apic_timer_expired(apic, false); local_irq_restore(flags); } +static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) +{ + return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; +} + static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) { ktime_t now, remaining; u64 ns_remaining_old, ns_remaining_new; - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); limit_periodic_timer_frequency(apic); now = ktime_get(); @@ -1672,14 +1691,15 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); } -static bool set_target_expiration(struct kvm_lapic *apic) +static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) { ktime_t now; u64 tscl = rdtsc(); + s64 deadline; now = ktime_get(); - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); if (!apic->lapic_timer.period) { apic->lapic_timer.tscdeadline = 0; @@ -1687,10 +1707,32 @@ static bool set_target_expiration(struct kvm_lapic *apic) } limit_periodic_timer_frequency(apic); + deadline = apic->lapic_timer.period; + + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + if (unlikely(count_reg != APIC_TMICT)) { + deadline = tmict_to_ns(apic, + kvm_lapic_get_reg(apic, count_reg)); + if (unlikely(deadline <= 0)) + deadline = apic->lapic_timer.period; + else if (unlikely(deadline > apic->lapic_timer.period)) { + pr_info_ratelimited( + "kvm: vcpu %i: requested lapic timer restore with " + "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " + "Using initial count to start timer.\n", + apic->vcpu->vcpu_id, + count_reg, + kvm_lapic_get_reg(apic, count_reg), + deadline, apic->lapic_timer.period); + kvm_lapic_set_reg(apic, count_reg, 0); + deadline = apic->lapic_timer.period; + } + } + } apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); - apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); + nsec_to_cycles(apic->vcpu, deadline); + apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); return true; } @@ -1723,7 +1765,7 @@ static void start_sw_period(struct kvm_lapic *apic) if (ktime_after(ktime_get(), apic->lapic_timer.target_expiration)) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); if (apic_lvtt_oneshot(apic)) return; @@ -1760,7 +1802,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) bool expired; WARN_ON(preemptible()); - if (!kvm_x86_ops.set_hv_timer) + if (!kvm_can_use_hv_timer(vcpu)) return false; if (!ktimer->tscdeadline) @@ -1785,7 +1827,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) if (atomic_read(&ktimer->pending)) { cancel_hv_timer(apic); } else if (expired) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); cancel_hv_timer(apic); } } @@ -1833,9 +1875,9 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) /* If the preempt notifier has already run, it also called apic_timer_expired */ if (!apic->lapic_timer.hv_timer_in_use) goto out; - WARN_ON(swait_active(&vcpu->wq)); + WARN_ON(rcuwait_active(&vcpu->wait)); cancel_hv_timer(apic); - apic_timer_expired(apic); + apic_timer_expired(apic, false); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -1872,17 +1914,22 @@ void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) restart_apic_timer(apic); } -static void start_apic_timer(struct kvm_lapic *apic) +static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) { atomic_set(&apic->lapic_timer.pending, 0); if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) - && !set_target_expiration(apic)) + && !set_target_expiration(apic, count_reg)) return; restart_apic_timer(apic); } +static void start_apic_timer(struct kvm_lapic *apic) +{ + __start_apic_timer(apic, APIC_TMICT); +} + static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); @@ -2336,7 +2383,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); - apic_timer_expired(apic); + apic_timer_expired(apic, true); if (lapic_is_periodic(apic)) { advance_periodic_target_expiration(apic); @@ -2493,6 +2540,14 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); + + /* + * Get calculated timer current count for remaining timer period (if + * any) and store it in the returned register set. + */ + __kvm_lapic_set_reg(s->regs, APIC_TMCCT, + __apic_read(vcpu->arch.apic, APIC_TMCCT)); + return kvm_apic_state_fixup(vcpu, s, false); } @@ -2520,7 +2575,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); - start_apic_timer(apic); + __start_apic_timer(apic, APIC_TMCCT); kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; if (vcpu->arch.apicv_active) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a0ffb4331418..754f29beb83e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -161,9 +161,14 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) return *((u32 *) (apic->regs + reg_off)); } +static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) +{ + *((u32 *) (regs + reg_off)) = val; +} + static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { - *((u32 *) (apic->regs + reg_off)) = val; + __kvm_lapic_set_reg(apic->regs, reg_off, val); } extern struct static_key kvm_no_apic_vcpu; @@ -245,7 +250,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); -bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu); +bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 8a3b1bce722a..0ad06bfe2c2c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -51,13 +51,13 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots); -void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty, gpa_t new_eptp); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fd59fee84631..fdd05c233308 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -16,6 +16,7 @@ */ #include "irq.h" +#include "ioapic.h" #include "mmu.h" #include "x86.h" #include "kvm_cache_regs.h" @@ -78,6 +79,9 @@ module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, &nx_huge_pages_recovery_ratio, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); +static bool __read_mostly force_flush_and_sync_on_reuse; +module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -244,7 +248,6 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; -static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; @@ -331,19 +334,19 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - BUG_ON((mmio_mask & mmio_value) != mmio_value); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; - shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static bool is_mmio_spte(u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value; + return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK; } static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) @@ -566,7 +569,6 @@ static void kvm_mmu_reset_all_pte_masks(void) shadow_dirty_mask = 0; shadow_nx_mask = 0; shadow_x_mask = 0; - shadow_mmio_mask = 0; shadow_present_mask = 0; shadow_acc_track_mask = 0; @@ -583,16 +585,15 @@ static void kvm_mmu_reset_all_pte_masks(void) * the most significant bits of legal physical address space. */ shadow_nonpresent_or_rsvd_mask = 0; - low_phys_bits = boot_cpu_data.x86_cache_bits; - if (boot_cpu_data.x86_cache_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) { + low_phys_bits = boot_cpu_data.x86_phys_bits; + if (boot_cpu_has_bug(X86_BUG_L1TF) && + !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= + 52 - shadow_nonpresent_or_rsvd_mask_len)) { + low_phys_bits = boot_cpu_data.x86_cache_bits + - shadow_nonpresent_or_rsvd_mask_len; shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_cache_bits - 1); - low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } else - WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); + } shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); @@ -620,7 +621,7 @@ static int is_large_pte(u64 pte) static int is_last_spte(u64 pte, int level) { - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) return 1; if (is_large_pte(pte)) return 1; @@ -1196,7 +1197,7 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, struct kvm_lpage_info *linfo; int i; - for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); linfo->disallow_lpage += count; WARN_ON(linfo->disallow_lpage < 0); @@ -1225,7 +1226,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) slot = __gfn_to_memslot(slots, gfn); /* the non-leaf shadow pages are keeping readonly. */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); @@ -1253,7 +1254,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); @@ -1398,7 +1399,7 @@ static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); - return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; + return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; } static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, @@ -1529,8 +1530,7 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) static bool __drop_large_spte(struct kvm *kvm, u64 *sptep) { if (is_large_pte(*sptep)) { - WARN_ON(page_header(__pa(sptep))->role.level == - PT_PAGE_TABLE_LEVEL); + WARN_ON(page_header(__pa(sptep))->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); --kvm->stat.lpages; return true; @@ -1682,7 +1682,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); + PG_LEVEL_4K, slot); __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ @@ -1708,7 +1708,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); + PG_LEVEL_4K, slot); __rmap_clear_dirty(kvm, rmap_head); /* clear the first set bit */ @@ -1760,7 +1760,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, int i; bool write_protected = false; - for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = __gfn_to_rmap(gfn, i, slot); write_protected |= __rmap_write_protect(kvm, rmap_head, true); } @@ -1948,8 +1948,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, gfn_start = hva_to_gfn_memslot(hva_start, memslot); gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, + for_each_slot_rmap_range(memslot, PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, gfn_start, gfn_end - 1, &iterator) ret |= handler(kvm, iterator.rmap, memslot, @@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu, return 0; } -static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root) -{ -} - static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) @@ -2313,7 +2309,7 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, return; if (local_flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } #ifdef CONFIG_KVM_MMU_AUDIT @@ -2347,7 +2343,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, if (!s->unsync) continue; - WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(s->role.level != PG_LEVEL_4K); ret |= kvm_sync_page(vcpu, s, invalid_list); } @@ -2376,7 +2372,7 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, int level = sp->role.level; parents->idx[level-1] = idx; - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) break; parents->parent[level-2] = sp; @@ -2398,7 +2394,7 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec, sp = pvec->page[0].sp; level = sp->role.level; - WARN_ON(level == PT_PAGE_TABLE_LEVEL); + WARN_ON(level == PG_LEVEL_4K); parents->parent[level-2] = sp; @@ -2520,11 +2516,11 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, break; WARN_ON(!list_empty(&invalid_list)); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } if (sp->unsync_children) - kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); @@ -2546,11 +2542,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, * be inconsistent with guest page table. */ account_shadowed(vcpu->kvm, sp); - if (level == PT_PAGE_TABLE_LEVEL && - rmap_write_protect(vcpu, gfn)) + if (level == PG_LEVEL_4K && rmap_write_protect(vcpu, gfn)) kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1); - if (level > PT_PAGE_TABLE_LEVEL && need_sync) + if (level > PG_LEVEL_4K && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } clear_page(sp->spt); @@ -2601,7 +2596,7 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { - if (iterator->level < PT_PAGE_TABLE_LEVEL) + if (iterator->level < PG_LEVEL_4K) return false; iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); @@ -2722,7 +2717,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, struct mmu_page_path parents; struct kvm_mmu_pages pages; - if (parent->role.level == PT_PAGE_TABLE_LEVEL) + if (parent->role.level == PG_LEVEL_4K) return 0; while (mmu_unsync_walk(parent, &pages)) { @@ -2921,7 +2916,7 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, if (sp->unsync) continue; - WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } @@ -3020,7 +3015,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!speculative) spte |= spte_shadow_accessed_mask(spte); - if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && + if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) && is_nx_huge_page_enabled()) { pte_access &= ~ACC_EXEC_MASK; } @@ -3033,7 +3028,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (pte_access & ACC_USER_MASK) spte |= shadow_user_mask; - if (level > PT_PAGE_TABLE_LEVEL) + if (level > PG_LEVEL_4K) spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn, @@ -3103,8 +3098,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. */ - if (level > PT_PAGE_TABLE_LEVEL && - !is_large_pte(*sptep)) { + if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; u64 pte = *sptep; @@ -3125,7 +3119,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { if (write_fault) ret = RET_PF_EMULATE; - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush) @@ -3228,7 +3222,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) if (sp_ad_disabled(sp)) return; - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -3241,12 +3235,8 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, pte_t *pte; int level; - BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K || - PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M || - PT_PDPE_LEVEL != (int)PG_LEVEL_1G); - if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; /* * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() @@ -3260,7 +3250,7 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level); if (unlikely(!pte)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; return level; } @@ -3274,28 +3264,28 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t mask; int level; - if (unlikely(max_level == PT_PAGE_TABLE_LEVEL)) - return PT_PAGE_TABLE_LEVEL; + if (unlikely(max_level == PG_LEVEL_4K)) + return PG_LEVEL_4K; if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true); if (!slot) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; max_level = min(max_level, max_page_level); - for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) { + for ( ; max_level > PG_LEVEL_4K; max_level--) { linfo = lpage_info_slot(gfn, slot, max_level); if (!linfo->disallow_lpage) break; } - if (max_level == PT_PAGE_TABLE_LEVEL) - return PT_PAGE_TABLE_LEVEL; + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; level = host_pfn_mapping_level(vcpu, gfn, pfn, slot); - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) return level; level = min(level, max_level); @@ -3317,7 +3307,7 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, int level = *levelp; u64 spte = *it.sptep; - if (it.level == level && level > PT_PAGE_TABLE_LEVEL && + if (it.level == level && level > PG_LEVEL_4K && is_nx_huge_page_enabled() && is_shadow_present_pte(spte) && !is_large_pte(spte)) { @@ -3574,7 +3564,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * * See the comments in kvm_arch_commit_memory_region(). */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) break; } @@ -3666,7 +3656,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, &invalid_list); mmu->root_hpa = INVALID_PAGE; } - mmu->root_cr3 = 0; + mmu->root_pgd = 0; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -3686,58 +3676,64 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) return ret; } -static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) +static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, + u8 level, bool direct) { struct kvm_mmu_page *sp; + + spin_lock(&vcpu->kvm->mmu_lock); + + if (make_mmu_pages_available(vcpu)) { + spin_unlock(&vcpu->kvm->mmu_lock); + return INVALID_PAGE; + } + sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL); + ++sp->root_count; + + spin_unlock(&vcpu->kvm->mmu_lock); + return __pa(sp->spt); +} + +static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) +{ + u8 shadow_root_level = vcpu->arch.mmu->shadow_root_level; + hpa_t root; unsigned i; - if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { - spin_lock(&vcpu->kvm->mmu_lock); - if(make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + if (shadow_root_level >= PT64_ROOT_4LEVEL) { + root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, 0, 0, - vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu->root_hpa = __pa(sp->spt); - } else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) { + vcpu->arch.mmu->root_hpa = root; + } else if (shadow_root_level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu->pae_root[i]; + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); - MMU_WARN_ON(VALID_PAGE(root)); - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), + i << 30, PT32_ROOT_LEVEL, true); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK; } vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); } else BUG(); - /* root_cr3 is ignored for direct MMUs. */ - vcpu->arch.mmu->root_cr3 = 0; + /* root_pgd is ignored for direct MMUs. */ + vcpu->arch.mmu->root_pgd = 0; return 0; } static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { - struct kvm_mmu_page *sp; u64 pdptr, pm_mask; - gfn_t root_gfn, root_cr3; + gfn_t root_gfn, root_pgd; + hpa_t root; int i; - root_cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); - root_gfn = root_cr3 >> PAGE_SHIFT; + root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu); + root_gfn = root_pgd >> PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) return 1; @@ -3747,22 +3743,14 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * write-protect the guests page table root. */ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu->root_hpa; - - MMU_WARN_ON(VALID_PAGE(root)); + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa)); - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, root_gfn, 0, + vcpu->arch.mmu->shadow_root_level, false); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu->root_hpa = root; - goto set_root_cr3; + goto set_root_pgd; } /* @@ -3775,9 +3763,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu->pae_root[i]; - - MMU_WARN_ON(VALID_PAGE(root)); + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) { pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i); if (!(pdptr & PT_PRESENT_MASK)) { @@ -3788,17 +3774,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (mmu_check_root(vcpu, root_gfn)) return 1; } - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); - return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, - 0, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, root_gfn, i << 30, + PT32_ROOT_LEVEL, false); + if (!VALID_PAGE(root)) + return -ENOSPC; vcpu->arch.mmu->pae_root[i] = root | pm_mask; } vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); @@ -3828,8 +3808,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); } -set_root_cr3: - vcpu->arch.mmu->root_cr3 = root_cr3; +set_root_pgd: + vcpu->arch.mmu->root_pgd = root_pgd; return 0; } @@ -4083,18 +4063,16 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, bool *writable) { - struct kvm_memory_slot *slot; + struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; - /* - * Don't expose private memslots to L2. - */ - if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* Don't expose private memslots to L2. */ + if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) { *pfn = KVM_PFN_NOSLOT; + *writable = false; return false; } - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); async = false; *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable); if (!async) @@ -4135,7 +4113,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, return r; if (lpage_disallowed) - max_level = PT_PAGE_TABLE_LEVEL; + max_level = PG_LEVEL_4K; if (fast_page_fault(vcpu, gpa, error_code)) return RET_PF_RETRY; @@ -4171,7 +4149,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault, - PT_DIRECTORY_LEVEL, false); + PG_LEVEL_2M, false); } int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, @@ -4186,7 +4164,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, #endif vcpu->arch.l1tf_flush_l1d = true; - switch (vcpu->arch.apf.host_apf_reason) { + switch (vcpu->arch.apf.host_apf_flags) { default: trace_kvm_page_fault(fault_address, error_code); @@ -4196,13 +4174,13 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, insn_len); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: - vcpu->arch.apf.host_apf_reason = 0; + vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address, 0); + kvm_async_pf_task_wait_schedule(fault_address); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: - vcpu->arch.apf.host_apf_reason = 0; + vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); kvm_async_pf_task_wake(fault_address); local_irq_enable(); @@ -4217,8 +4195,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, { int max_level; - for (max_level = PT_MAX_HUGEPAGE_LEVEL; - max_level > PT_PAGE_TABLE_LEVEL; + for (max_level = KVM_MAX_HUGEPAGE_LEVEL; + max_level > PG_LEVEL_4K; max_level--) { int page_num = KVM_PAGES_PER_HPAGE(max_level); gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1); @@ -4237,7 +4215,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->page_fault = nonpaging_page_fault; context->gva_to_gpa = nonpaging_gva_to_gpa; context->sync_page = nonpaging_sync_page; - context->invlpg = nonpaging_invlpg; + context->invlpg = NULL; context->update_pte = nonpaging_update_pte; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; @@ -4245,51 +4223,50 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } -static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t cr3, +static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role) { - return (role.direct || cr3 == root->cr3) && + return (role.direct || pgd == root->pgd) && VALID_PAGE(root->hpa) && page_header(root->hpa) && role.word == page_header(root->hpa)->role.word; } /* - * Find out if a previously cached root matching the new CR3/role is available. + * Find out if a previously cached root matching the new pgd/role is available. * The current root is also inserted into the cache. * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is * returned. * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and * false is returned. This root should now be freed by the caller. */ -static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, +static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; struct kvm_mmu_root_info root; struct kvm_mmu *mmu = vcpu->arch.mmu; - root.cr3 = mmu->root_cr3; + root.pgd = mmu->root_pgd; root.hpa = mmu->root_hpa; - if (is_root_usable(&root, new_cr3, new_role)) + if (is_root_usable(&root, new_pgd, new_role)) return true; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { swap(root, mmu->prev_roots[i]); - if (is_root_usable(&root, new_cr3, new_role)) + if (is_root_usable(&root, new_pgd, new_role)) break; } mmu->root_hpa = root.hpa; - mmu->root_cr3 = root.cr3; + mmu->root_pgd = root.pgd; return i < KVM_MMU_NUM_PREV_ROOTS; } -static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, - union kvm_mmu_page_role new_role, - bool skip_tlb_flush) +static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd, + union kvm_mmu_page_role new_role) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -4299,70 +4276,59 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, * later if necessary. */ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && - mmu->root_level >= PT64_ROOT_4LEVEL) { - if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) - return false; - - if (cached_root_available(vcpu, new_cr3, new_role)) { - /* - * It is possible that the cached previous root page is - * obsolete because of a change in the MMU generation - * number. However, changing the generation number is - * accompanied by KVM_REQ_MMU_RELOAD, which will free - * the root set here and allocate a new one. - */ - kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); - if (!skip_tlb_flush) { - kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - - /* - * The last MMIO access's GVA and GPA are cached in the - * VCPU. When switching to a new CR3, that GVA->GPA - * mapping may no longer be valid. So clear any cached - * MMIO info even when we don't need to sync the shadow - * page tables. - */ - vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); - - __clear_sp_write_flooding_count( - page_header(mmu->root_hpa)); - - return true; - } - } + mmu->root_level >= PT64_ROOT_4LEVEL) + return !mmu_check_root(vcpu, new_pgd >> PAGE_SHIFT) && + cached_root_available(vcpu, new_pgd, new_role); return false; } -static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, +static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, union kvm_mmu_page_role new_role, - bool skip_tlb_flush) + bool skip_tlb_flush, bool skip_mmu_sync) { - if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, - KVM_MMU_ROOT_CURRENT); + if (!fast_pgd_switch(vcpu, new_pgd, new_role)) { + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT); + return; + } + + /* + * It's possible that the cached previous root page is obsolete because + * of a change in the MMU generation number. However, changing the + * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will + * free the root set here and allocate a new one. + */ + kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); + + if (!skip_mmu_sync || force_flush_and_sync_on_reuse) + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + if (!skip_tlb_flush || force_flush_and_sync_on_reuse) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + + /* + * The last MMIO access's GVA and GPA are cached in the VCPU. When + * switching to a new CR3, that GVA->GPA mapping may no longer be + * valid. So clear any cached MMIO info even when we don't need to sync + * the shadow page tables. + */ + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); + + __clear_sp_write_flooding_count(page_header(vcpu->arch.mmu->root_hpa)); } -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush) +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, + bool skip_mmu_sync) { - __kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu), - skip_tlb_flush); + __kvm_mmu_new_pgd(vcpu, new_pgd, kvm_mmu_calc_root_page_role(vcpu), + skip_tlb_flush, skip_mmu_sync); } -EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); +EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static unsigned long get_cr3(struct kvm_vcpu *vcpu) { return kvm_read_cr3(vcpu); } -static void inject_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) -{ - vcpu->arch.mmu->inject_page_fault(vcpu, fault); -} - static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access, int *nr_present) { @@ -4391,11 +4357,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, gpte &= level - mmu->last_nonleaf_level; /* - * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set - * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means - * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + * PG_LEVEL_4K always terminates. The RHS has bit 7 set + * iff level <= PG_LEVEL_4K, which for our purpose means + * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then. */ - gpte |= level - PT_PAGE_TABLE_LEVEL - 1; + gpte |= level - PG_LEVEL_4K - 1; return gpte & PT_PAGE_SIZE_MASK; } @@ -4909,7 +4875,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only); role.base.ad_disabled = (shadow_accessed_mask == 0); - role.base.level = kvm_x86_ops.get_tdp_level(vcpu); + role.base.level = vcpu->arch.tdp_level; role.base.direct = true; role.base.gpte_is_8_bytes = true; @@ -4928,9 +4894,9 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->mmu_role.as_u64 = new_role.as_u64; context->page_fault = kvm_tdp_page_fault; context->sync_page = nonpaging_sync_page; - context->invlpg = nonpaging_invlpg; + context->invlpg = NULL; context->update_pte = nonpaging_update_pte; - context->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); + context->shadow_root_level = vcpu->arch.tdp_level; context->direct_map = true; context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; @@ -4986,7 +4952,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) return role; } -void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer) { struct kvm_mmu *context = vcpu->arch.mmu; union kvm_mmu_role new_role = @@ -4995,11 +4961,11 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) if (new_role.as_u64 == context->mmu_role.as_u64) return; - if (!is_paging(vcpu)) + if (!(cr0 & X86_CR0_PG)) nonpaging_init_context(vcpu, context); - else if (is_long_mode(vcpu)) + else if (efer & EFER_LMA) paging64_init_context(vcpu, context); - else if (is_pae(vcpu)) + else if (cr4 & X86_CR4_PAE) paging32E_init_context(vcpu, context); else paging32_init_context(vcpu, context); @@ -5047,7 +5013,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, execonly, level); - __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false); + __kvm_mmu_new_pgd(vcpu, new_eptp, new_role.base, true, true); if (new_role.as_u64 == context->mmu_role.as_u64) return; @@ -5077,7 +5043,11 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = vcpu->arch.mmu; - kvm_init_shadow_mmu(vcpu); + kvm_init_shadow_mmu(vcpu, + kvm_read_cr0_bits(vcpu, X86_CR0_PG), + kvm_read_cr4_bits(vcpu, X86_CR4_PAE), + vcpu->arch.efer); + context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; @@ -5097,6 +5067,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->inject_page_fault = kvm_inject_page_fault; /* + * L2 page tables are never shadowed, so there is no need to sync + * SPTEs. + */ + g_context->invlpg = NULL; + + /* * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using * L1's nested page tables (e.g. EPT12). The nested translation * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using @@ -5183,7 +5159,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; kvm_mmu_load_pgd(vcpu); - kvm_x86_ops.tlb_flush(vcpu, true); + kvm_x86_ops.tlb_flush_current(vcpu); out: return r; } @@ -5202,7 +5178,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *new) { - if (sp->role.level != PT_PAGE_TABLE_LEVEL) { + if (sp->role.level != PG_LEVEL_4K) { ++vcpu->kvm->stat.mmu_pde_zapped; return; } @@ -5260,7 +5236,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) * Skip write-flooding detected for the sp whose level is 1, because * it can become unsync, then the guest page is not write-protected. */ - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (sp->role.level == PG_LEVEL_4K) return false; atomic_inc(&sp->write_flooding_count); @@ -5497,37 +5473,54 @@ emulate: } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); -void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa) { - struct kvm_mmu *mmu = vcpu->arch.mmu; int i; - /* INVLPG on a * non-canonical address is a NOP according to the SDM. */ - if (is_noncanonical_address(gva, vcpu)) + /* It's actually a GPA for vcpu->arch.guest_mmu. */ + if (mmu != &vcpu->arch.guest_mmu) { + /* INVLPG on a non-canonical address is a NOP according to the SDM. */ + if (is_noncanonical_address(gva, vcpu)) + return; + + kvm_x86_ops.tlb_flush_gva(vcpu, gva); + } + + if (!mmu->invlpg) return; - mmu->invlpg(vcpu, gva, mmu->root_hpa); + if (root_hpa == INVALID_PAGE) { + mmu->invlpg(vcpu, gva, mmu->root_hpa); - /* - * INVLPG is required to invalidate any global mappings for the VA, - * irrespective of PCID. Since it would take us roughly similar amount - * of work to determine whether any of the prev_root mappings of the VA - * is marked global, or to just sync it blindly, so we might as well - * just always sync it. - * - * Mappings not reachable via the current cr3 or the prev_roots will be - * synced when switching to that cr3, so nothing needs to be done here - * for them. - */ - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (VALID_PAGE(mmu->prev_roots[i].hpa)) - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + /* + * INVLPG is required to invalidate any global mappings for the VA, + * irrespective of PCID. Since it would take us roughly similar amount + * of work to determine whether any of the prev_root mappings of the VA + * is marked global, or to just sync it blindly, so we might as well + * just always sync it. + * + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. + */ + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (VALID_PAGE(mmu->prev_roots[i].hpa)) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + } else { + mmu->invlpg(vcpu, gva, root_hpa); + } +} +EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva); - kvm_x86_ops.tlb_flush_gva(vcpu, gva); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) +{ + kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); + void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -5541,7 +5534,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && - pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) { + pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } @@ -5574,9 +5567,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_page_level) if (tdp_enabled) max_page_level = tdp_page_level; else if (boot_cpu_has(X86_FEATURE_GBPAGES)) - max_page_level = PT_PDPE_LEVEL; + max_page_level = PG_LEVEL_1G; else - max_page_level = PT_DIRECTORY_LEVEL; + max_page_level = PG_LEVEL_2M; } EXPORT_SYMBOL_GPL(kvm_configure_mmu); @@ -5632,24 +5625,24 @@ static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K + 1, + KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_PAGE_TABLE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, + PG_LEVEL_4K, lock_flush_tlb); } static void free_mmu_pages(struct kvm_mmu *mmu) @@ -5672,7 +5665,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can * skip allocating the PDP table. */ - if (tdp_enabled && kvm_x86_ops.get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) + if (tdp_enabled && vcpu->arch.tdp_level > PT32E_ROOT_LEVEL) return 0; page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); @@ -5695,13 +5688,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; - vcpu->arch.root_mmu.root_cr3 = 0; + vcpu->arch.root_mmu.root_pgd = 0; vcpu->arch.root_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; - vcpu->arch.guest_mmu.root_cr3 = 0; + vcpu->arch.guest_mmu.root_pgd = 0; vcpu->arch.guest_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; @@ -5859,7 +5852,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) continue; slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, - PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, + PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, true); } } @@ -5881,7 +5875,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, spin_lock(&kvm->mmu_lock); flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, PT_MAX_HUGEPAGE_LEVEL, false); + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); spin_unlock(&kvm->mmu_lock); /* @@ -6142,27 +6136,18 @@ static void kvm_set_mmio_spte_mask(void) u64 mask; /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. + * Set a reserved PA bit in MMIO SPTEs to generate page faults with + * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT + * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports + * 52-bit physical addresses then there are no reserved PA bits in the + * PTEs and so the reserved PA approach must be disabled. */ + if (shadow_phys_bits < 52) + mask = BIT_ULL(51) | PT_PRESENT_MASK; + else + mask = 0; - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. - */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (shadow_phys_bits == 52) - mask &= ~1ull; - - kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, ACC_WRITE_MASK | ACC_USER_MASK); } static bool get_nx_auto_mode(void) diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index ddc1ec3bdacd..a7bcde34d1f2 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -61,7 +61,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, { int index, val; - index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); val = slot->arch.gfn_track[mode][index]; @@ -151,7 +151,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, if (!slot) return false; - index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 9bdf9b7d9a96..38c576495048 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -75,7 +75,7 @@ #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) -#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) +#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K) /* * The guest_walker structure emulates the behavior of the hardware page @@ -198,7 +198,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; - if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K)) goto no_present; return false; @@ -436,7 +436,7 @@ retry_walk: gfn = gpte_to_gfn_lvl(pte, walker->level); gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; - if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) + if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault); @@ -552,7 +552,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn, true, true); kvm_release_pfn_clean(pfn); @@ -575,7 +575,7 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, u64 mask; int r, index; - if (level == PT_PAGE_TABLE_LEVEL) { + if (level == PG_LEVEL_4K) { mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1; base_gpa = pte_gpa & ~mask; index = (pte_gpa - base_gpa) / sizeof(pt_element_t); @@ -600,7 +600,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, sp = page_header(__pa(sptep)); - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return; if (sp->role.direct) @@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, if (!r) { pgprintk("%s: guest page fault\n", __func__); if (!prefault) - inject_page_fault(vcpu, &walker.fault); + kvm_inject_emulated_page_fault(vcpu, &walker.fault); return RET_PF_RETRY; } @@ -828,7 +828,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); if (lpage_disallowed || is_self_change_mapping) - max_level = PT_PAGE_TABLE_LEVEL; + max_level = PG_LEVEL_4K; else max_level = walker.level; @@ -884,7 +884,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; - WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(sp->role.level != PG_LEVEL_4K); if (PTTYPE == 32) offset = sp->role.quadrant << PT64_LEVEL_BITS; @@ -1070,7 +1070,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; set_spte_ret |= set_spte(vcpu, &sp->spt[i], - pte_access, PT_PAGE_TABLE_LEVEL, + pte_access, PG_LEVEL_4K, gfn, spte_to_pfn(sp->spt[i]), true, false, host_writable); } diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ca39f62aabc6..9d2844f87f6d 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -100,7 +100,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) sp = page_header(__pa(sptep)); if (sp->unsync) { - if (level != PT_PAGE_TABLE_LEVEL) { + if (level != PG_LEVEL_4K) { audit_printk(vcpu->kvm, "unsync sp: %p " "level = %d\n", sp, level); return; @@ -176,7 +176,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) { int i; - if (sp->role.level != PT_PAGE_TABLE_LEVEL) + if (sp->role.level != PG_LEVEL_4K) return; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { @@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); + rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a5078841bdac..b86346903f2e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -397,9 +397,9 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) __set_bit(pmc->idx, pmu->pmc_in_use); } -int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr, data); + return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info); } int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a6c78a797cb1..ab85eed8a6cc 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -32,7 +32,7 @@ struct kvm_pmu_ops { struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr); int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); + int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void (*refresh)(struct kvm_vcpu *vcpu); void (*init)(struct kvm_vcpu *vcpu); @@ -147,7 +147,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); -int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); +int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9a2a62e5afeb..8a6db11dcb43 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -25,6 +25,8 @@ #include "trace.h" #include "mmu.h" #include "x86.h" +#include "cpuid.h" +#include "lapic.h" #include "svm.h" static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -59,7 +61,7 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); - u64 cr3 = svm->nested.nested_cr3; + u64 cr3 = svm->nested.ctl.nested_cr3; u64 pdpte; int ret; @@ -74,19 +76,22 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - return svm->nested.nested_cr3; + return svm->nested.ctl.nested_cr3; } static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + WARN_ON(mmu_is_nested(vcpu)); vcpu->arch.mmu = &vcpu->arch.guest_mmu; - kvm_init_shadow_mmu(vcpu); + kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer); vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); + vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level; reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -99,8 +104,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) void recalc_intercepts(struct vcpu_svm *svm) { - struct vmcb_control_area *c, *h; - struct nested_state *g; + struct vmcb_control_area *c, *h, *g; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -109,14 +113,16 @@ void recalc_intercepts(struct vcpu_svm *svm) c = &svm->vmcb->control; h = &svm->nested.hsave->control; - g = &svm->nested; + g = &svm->nested.ctl; + + svm->nested.host_intercept_exceptions = h->intercept_exceptions; c->intercept_cr = h->intercept_cr; c->intercept_dr = h->intercept_dr; c->intercept_exceptions = h->intercept_exceptions; c->intercept = h->intercept; - if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { + if (g->int_ctl & V_INTR_MASKING_MASK) { /* We only want the cr8 intercept bits of L1 */ c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); @@ -138,11 +144,9 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercept |= g->intercept; } -static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) +static void copy_vmcb_control_area(struct vmcb_control_area *dst, + struct vmcb_control_area *from) { - struct vmcb_control_area *dst = &dst_vmcb->control; - struct vmcb_control_area *from = &from_vmcb->control; - dst->intercept_cr = from->intercept_cr; dst->intercept_dr = from->intercept_dr; dst->intercept_exceptions = from->intercept_exceptions; @@ -150,7 +154,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; @@ -179,7 +183,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) */ int i; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return true; for (i = 0; i < MSRPM_OFFSETS; i++) { @@ -190,7 +194,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) break; p = msrpm_offsets[i]; - offset = svm->nested.vmcb_msrpm + (p * 4); + offset = svm->nested.ctl.msrpm_base_pa + (p * 4); if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) return false; @@ -203,41 +207,111 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) return true; } -static bool nested_vmcb_checks(struct vmcb *vmcb) +static bool nested_vmcb_check_controls(struct vmcb_control_area *control) { - if ((vmcb->save.efer & EFER_SVME) == 0) + if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0) return false; - if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) + if (control->asid == 0) return false; - if (vmcb->control.asid == 0) - return false; - - if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && + if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled) return false; return true; } -void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map) +static bool nested_vmcb_checks(struct vmcb *vmcb) { - bool evaluate_pending_interrupts = - is_intercept(svm, INTERCEPT_VINTR) || - is_intercept(svm, INTERCEPT_IRET); + if ((vmcb->save.efer & EFER_SVME) == 0) + return false; - if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) - svm->vcpu.arch.hflags |= HF_HIF_MASK; - else - svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + if (((vmcb->save.cr0 & X86_CR0_CD) == 0) && + (vmcb->save.cr0 & X86_CR0_NW)) + return false; - if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { - svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; - nested_svm_init_mmu_context(&svm->vcpu); + return nested_vmcb_check_controls(&vmcb->control); +} + +static void load_nested_vmcb_control(struct vcpu_svm *svm, + struct vmcb_control_area *control) +{ + copy_vmcb_control_area(&svm->nested.ctl, control); + + /* Copy it here because nested_svm_check_controls will check it. */ + svm->nested.ctl.asid = control->asid; + svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; + svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; +} + +/* + * Synchronize fields that are written by the processor, so that + * they can be copied back into the nested_vmcb. + */ +void sync_nested_vmcb_control(struct vcpu_svm *svm) +{ + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && + is_intercept(svm, SVM_EXIT_VINTR)) { + /* + * In order to request an interrupt window, L0 is usurping + * svm->vmcb->control.int_ctl and possibly setting V_IRQ + * even if it was clear in L1's VMCB. Restoring it would be + * wrong. However, in this case V_IRQ will remain true until + * interrupt_window_interception calls svm_clear_vintr and + * restores int_ctl. We can just leave it aside. + */ + mask &= ~V_IRQ_MASK; } + svm->nested.ctl.int_ctl &= ~mask; + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; +} + +/* + * Transfer any event that L0 or L1 wanted to inject into L2 to + * EXIT_INT_INFO. + */ +static void nested_vmcb_save_pending_event(struct vcpu_svm *svm, + struct vmcb *nested_vmcb) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 exit_int_info = 0; + unsigned int nr; + + if (vcpu->arch.exception.injected) { + nr = vcpu->arch.exception.nr; + exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + + if (vcpu->arch.exception.has_error_code) { + exit_int_info |= SVM_EVTINJ_VALID_ERR; + nested_vmcb->control.exit_int_info_err = + vcpu->arch.exception.error_code; + } + + } else if (vcpu->arch.nmi_injected) { + exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + } else if (vcpu->arch.interrupt.injected) { + nr = vcpu->arch.interrupt.nr; + exit_int_info = nr | SVM_EVTINJ_VALID; + + if (vcpu->arch.interrupt.soft) + exit_int_info |= SVM_EVTINJ_TYPE_SOFT; + else + exit_int_info |= SVM_EVTINJ_TYPE_INTR; + } + + nested_vmcb->control.exit_int_info = exit_int_info; +} + +static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) +{ /* Load the nested guest state */ svm->vmcb->save.es = nested_vmcb->save.es; svm->vmcb->save.cs = nested_vmcb->save.cs; @@ -249,14 +323,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); - if (npt_enabled) { - svm->vmcb->save.cr3 = nested_vmcb->save.cr3; - svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; - } else - (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); - - /* Guest paging mode is active - reset mmu */ - kvm_mmu_reset_context(&svm->vcpu); + (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); @@ -270,38 +337,34 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.dr7 = nested_vmcb->save.dr7; svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; +} - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; - svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; +static void nested_prepare_vmcb_control(struct vcpu_svm *svm) +{ + const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) + nested_svm_init_mmu_context(&svm->vcpu); - /* cache intercepts */ - svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; - svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; - svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; - svm->nested.intercept = nested_vmcb->control.intercept; + /* Guest paging mode is active - reset mmu */ + kvm_mmu_reset_context(&svm->vcpu); - svm_flush_tlb(&svm->vcpu, true); - svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) - svm->vcpu.arch.hflags |= HF_VINTR_MASK; - else - svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + svm_flush_tlb(&svm->vcpu); - svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; - svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; - svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; - svm->vmcb->control.int_state = nested_vmcb->control.int_state; - svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; - svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + svm->vmcb->control.int_ctl = + (svm->nested.ctl.int_ctl & ~mask) | + (svm->nested.hsave->control.int_ctl & mask); - svm->vmcb->control.pause_filter_count = - nested_vmcb->control.pause_filter_count; - svm->vmcb->control.pause_filter_thresh = - nested_vmcb->control.pause_filter_thresh; + svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; + svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; + svm->vmcb->control.int_state = svm->nested.ctl.int_state; + svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; + svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; - kvm_vcpu_unmap(&svm->vcpu, map, true); + svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count; + svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh; /* Enter Guest-Mode */ enter_guest_mode(&svm->vcpu); @@ -312,25 +375,18 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, */ recalc_intercepts(svm); - svm->nested.vmcb = vmcb_gpa; + mark_all_dirty(svm->vmcb); +} - /* - * If L1 had a pending IRQ/NMI before executing VMRUN, - * which wasn't delivered because it was disallowed (e.g. - * interrupts disabled), L0 needs to evaluate if this pending - * event should cause an exit from L2 to L1 or be delivered - * directly to L2. - * - * Usually this would be handled by the processor noticing an - * IRQ/NMI window request. However, VMRUN can unblock interrupts - * by implicitly setting GIF, so force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - enable_gif(svm); - if (unlikely(evaluate_pending_interrupts)) - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); +void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, + struct vmcb *nested_vmcb) +{ + svm->nested.vmcb = vmcb_gpa; + load_nested_vmcb_control(svm, &nested_vmcb->control); + nested_prepare_vmcb_save(svm, nested_vmcb); + nested_prepare_vmcb_control(svm); - mark_all_dirty(svm->vmcb); + svm_set_gif(svm, true); } int nested_svm_vmrun(struct vcpu_svm *svm) @@ -342,8 +398,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm) struct kvm_host_map map; u64 vmcb_gpa; - vmcb_gpa = svm->vmcb->save.rax; + if (is_smm(&svm->vcpu)) { + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + vmcb_gpa = svm->vmcb->save.rax; ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); if (ret == -EINVAL) { kvm_inject_gp(&svm->vcpu, 0); @@ -361,10 +421,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_vmcb->control.exit_code_hi = 0; nested_vmcb->control.exit_info_1 = 0; nested_vmcb->control.exit_info_2 = 0; - - kvm_vcpu_unmap(&svm->vcpu, &map, true); - - return ret; + goto out; } trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, @@ -404,9 +461,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm) else hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); - copy_vmcb_control_area(hsave, vmcb); + copy_vmcb_control_area(&hsave->control, &vmcb->control); - enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); + svm->nested.nested_run_pending = 1; + enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb); if (!nested_svm_vmrun_msrpm(svm)) { svm->vmcb->control.exit_code = SVM_EXIT_ERR; @@ -417,6 +475,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_svm_vmexit(svm); } +out: + kvm_vcpu_unmap(&svm->vcpu, &map, true); + return ret; } @@ -444,13 +505,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *vmcb = svm->vmcb; struct kvm_host_map map; - trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, - vmcb->control.exit_info_1, - vmcb->control.exit_info_2, - vmcb->control.exit_int_info, - vmcb->control.exit_int_info_err, - KVM_ISA_SVM); - rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); if (rc) { if (rc == -EINVAL) @@ -463,9 +517,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(&svm->vcpu); svm->nested.vmcb = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); + + /* in case we halted in L2 */ + svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; /* Give the current vmcb to the guest */ - disable_gif(svm); + svm_set_gif(svm, false); nested_vmcb->save.es = vmcb->save.es; nested_vmcb->save.cs = vmcb->save.cs; @@ -479,62 +537,42 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); - nested_vmcb->save.rip = vmcb->save.rip; - nested_vmcb->save.rsp = vmcb->save.rsp; - nested_vmcb->save.rax = vmcb->save.rax; + nested_vmcb->save.rip = kvm_rip_read(&svm->vcpu); + nested_vmcb->save.rsp = kvm_rsp_read(&svm->vcpu); + nested_vmcb->save.rax = kvm_rax_read(&svm->vcpu); nested_vmcb->save.dr7 = vmcb->save.dr7; nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; - nested_vmcb->control.int_vector = vmcb->control.int_vector; nested_vmcb->control.int_state = vmcb->control.int_state; nested_vmcb->control.exit_code = vmcb->control.exit_code; nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; - nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; - nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + if (nested_vmcb->control.exit_code != SVM_EXIT_ERR) + nested_vmcb_save_pending_event(svm, nested_vmcb); if (svm->nrips_enabled) nested_vmcb->control.next_rip = vmcb->control.next_rip; - /* - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have - * to make sure that we do not lose injected events. So check event_inj - * here and copy it to exit_int_info if it is valid. - * Exit_int_info and event_inj can't be both valid because the case - * below only happens on a VMRUN instruction intercept which has - * no valid exit_int_info set. - */ - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { - struct vmcb_control_area *nc = &nested_vmcb->control; - - nc->exit_int_info = vmcb->control.event_inj; - nc->exit_int_info_err = vmcb->control.event_inj_err; - } - - nested_vmcb->control.tlb_ctl = 0; - nested_vmcb->control.event_inj = 0; - nested_vmcb->control.event_inj_err = 0; + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; nested_vmcb->control.pause_filter_count = svm->vmcb->control.pause_filter_count; nested_vmcb->control.pause_filter_thresh = svm->vmcb->control.pause_filter_thresh; - /* We always set V_INTR_MASKING and remember the old value in hflags */ - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; - /* Restore the original control entries */ - copy_vmcb_control_area(vmcb, hsave); + copy_vmcb_control_area(&vmcb->control, &hsave->control); - svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; - kvm_clear_exception_queue(&svm->vcpu); - kvm_clear_interrupt_queue(&svm->vcpu); + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset; - svm->nested.nested_cr3 = 0; + svm->nested.ctl.nested_cr3 = 0; /* Restore selected save entries */ svm->vmcb->save.es = hsave->save.es; @@ -562,6 +600,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) mark_all_dirty(svm->vmcb); + trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code, + nested_vmcb->control.exit_info_1, + nested_vmcb->control.exit_info_2, + nested_vmcb->control.exit_int_info, + nested_vmcb->control.exit_int_info_err, + KVM_ISA_SVM); + kvm_vcpu_unmap(&svm->vcpu, &map, true); nested_svm_uninit_mmu_context(&svm->vcpu); @@ -579,12 +624,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm) return 0; } +/* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +void svm_leave_nested(struct vcpu_svm *svm) +{ + if (is_guest_mode(&svm->vcpu)) { + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; + + svm->nested.nested_run_pending = 0; + leave_guest_mode(&svm->vcpu); + copy_vmcb_control_area(&vmcb->control, &hsave->control); + nested_svm_uninit_mmu_context(&svm->vcpu); + } +} + static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) { u32 offset, msr, value; int write, mask; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return NESTED_EXIT_HOST; msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; @@ -598,56 +659,12 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) /* Offset is in 32 bit units but need in 8 bit units */ offset *= 4; - if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) + if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4)) return NESTED_EXIT_DONE; return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } -/* DB exceptions for our internal use must not cause vmexit */ -static int nested_svm_intercept_db(struct vcpu_svm *svm) -{ - unsigned long dr6 = svm->vmcb->save.dr6; - - /* Always catch it and pass it to userspace if debugging. */ - if (svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return NESTED_EXIT_HOST; - - /* if we're not singlestepping, it's not ours */ - if (!svm->nmi_singlestep) - goto reflected_db; - - /* if it's not a singlestep exception, it's not ours */ - if (!(dr6 & DR6_BS)) - goto reflected_db; - - /* if the guest is singlestepping, it should get the vmexit */ - if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { - disable_nmi_singlestep(svm); - goto reflected_db; - } - - /* it's ours, the nested hypervisor must not see this one */ - return NESTED_EXIT_HOST; - -reflected_db: - /* - * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; - * it will be moved into the nested VMCB by nested_svm_vmexit. Once - * exceptions will be moved to svm_check_nested_events, all this stuff - * will just go away and we could just return NESTED_EXIT_HOST - * unconditionally. db_interception will queue the exception, which - * will be processed by svm_check_nested_events if a nested vmexit is - * required, and we will just use kvm_deliver_exception_payload to copy - * the payload to DR6 before vmexit. - */ - WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); - svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); - svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; - return NESTED_EXIT_DONE; -} - static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { unsigned port, size, iopm_len; @@ -655,13 +672,13 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) u8 start_bit; u64 gpa; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - gpa = svm->nested.vmcb_iopm + (port / 8); + gpa = svm->nested.ctl.iopm_base_pa + (port / 8); start_bit = port % 8; iopm_len = (start_bit + size > 8) ? 2 : 1; mask = (0xf >> (4 - size)) << start_bit; @@ -687,31 +704,23 @@ static int nested_svm_intercept(struct vcpu_svm *svm) break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); - if (svm->nested.intercept_cr & bit) + if (svm->nested.ctl.intercept_cr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); - if (svm->nested.intercept_dr & bit) + if (svm->nested.ctl.intercept_dr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { - u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); - if (svm->nested.intercept_exceptions & excp_bits) { - if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) - vmexit = nested_svm_intercept_db(svm); - else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && - svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) - vmexit = NESTED_EXIT_HOST; - else - vmexit = NESTED_EXIT_DONE; - } - /* async page fault always cause vmexit */ - else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->vcpu.arch.exception.nested_apf != 0) - vmexit = NESTED_EXIT_DONE; + /* + * Host-intercepted exceptions have been checked already in + * nested_svm_exit_special. There is nothing to do here, + * the vmexit is injected by svm_check_nested_events. + */ + vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_ERR: { @@ -720,7 +729,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - if (svm->nested.intercept & exit_bits) + if (svm->nested.ctl.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } @@ -756,62 +765,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm) return 0; } -int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, - bool has_error_code, u32 error_code) +static bool nested_exit_on_exception(struct vcpu_svm *svm) { - int vmexit; + unsigned int nr = svm->vcpu.arch.exception.nr; - if (!is_guest_mode(&svm->vcpu)) - return 0; + return (svm->nested.ctl.intercept_exceptions & (1 << nr)); +} - vmexit = nested_svm_intercept(svm); - if (vmexit != NESTED_EXIT_DONE) - return 0; +static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) +{ + unsigned int nr = svm->vcpu.arch.exception.nr; svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = error_code; + + if (svm->vcpu.arch.exception.has_error_code) + svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; /* * EXITINFO2 is undefined for all exception intercepts other * than #PF. */ - if (svm->vcpu.arch.exception.nested_apf) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else if (svm->vcpu.arch.exception.has_payload) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; - else - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + if (nr == PF_VECTOR) { + if (svm->vcpu.arch.exception.nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else if (svm->vcpu.arch.exception.has_payload) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + } else if (nr == DB_VECTOR) { + /* See inject_pending_event. */ + kvm_deliver_exception_payload(&svm->vcpu); + if (svm->vcpu.arch.dr7 & DR7_GD) { + svm->vcpu.arch.dr7 &= ~DR7_GD; + kvm_update_dr7(&svm->vcpu); + } + } else + WARN_ON(svm->vcpu.arch.exception.has_payload); - svm->nested.exit_required = true; - return vmexit; + nested_svm_vmexit(svm); +} + +static void nested_svm_smi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_SMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); +} + +static void nested_svm_nmi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_NMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } static void nested_svm_intr(struct vcpu_svm *svm) { + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + svm->vmcb->control.exit_code = SVM_EXIT_INTR; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; - /* nested_svm_vmexit this gets called afterwards from handle_exit */ - svm->nested.exit_required = true; - trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + nested_svm_vmexit(svm); } -static bool nested_exit_on_intr(struct vcpu_svm *svm) +static inline bool nested_exit_on_init(struct vcpu_svm *svm) { - return (svm->nested.intercept & 1ULL); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT)); +} + +static void nested_svm_init(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_INIT; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } -int svm_check_nested_events(struct kvm_vcpu *vcpu) + +static int svm_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool block_nested_events = - kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; + kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; + struct kvm_lapic *apic = vcpu->arch.apic; + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_INIT, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_init(svm)) + return 0; + nested_svm_init(svm); + return 0; + } + + if (vcpu->arch.exception.pending) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_exception(svm)) + return 0; + nested_svm_inject_exception_vmexit(svm); + return 0; + } + + if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_smi(svm)) + return 0; + nested_svm_smi(svm); + return 0; + } + + if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_nmi(svm)) + return 0; + nested_svm_nmi(svm); + return 0; + } - if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { + if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_intr(svm)) + return 0; nested_svm_intr(svm); return 0; } @@ -826,21 +913,170 @@ int nested_svm_exit_special(struct vcpu_svm *svm) switch (exit_code) { case SVM_EXIT_INTR: case SVM_EXIT_NMI: - case SVM_EXIT_EXCP_BASE + MC_VECTOR: - return NESTED_EXIT_HOST; case SVM_EXIT_NPF: - /* For now we are always handling NPFs when using them */ - if (npt_enabled) + return NESTED_EXIT_HOST; + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); + + if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits) return NESTED_EXIT_HOST; - break; - case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) + else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR && + svm->vcpu.arch.apf.host_apf_flags) + /* Trap async PF even if not shadowing */ return NESTED_EXIT_HOST; break; + } default: break; } return NESTED_EXIT_CONTINUE; } + +static int svm_get_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + u32 user_data_size) +{ + struct vcpu_svm *svm; + struct kvm_nested_state kvm_state = { + .flags = 0, + .format = KVM_STATE_NESTED_FORMAT_SVM, + .size = sizeof(kvm_state), + }; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + + if (!vcpu) + return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; + + svm = to_svm(vcpu); + + if (user_data_size < kvm_state.size) + goto out; + + /* First fill in the header and copy it out. */ + if (is_guest_mode(vcpu)) { + kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb; + kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; + kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; + + if (svm->nested.nested_run_pending) + kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; + } + + if (gif_set(svm)) + kvm_state.flags |= KVM_STATE_NESTED_GIF_SET; + + if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) + return -EFAULT; + + if (!is_guest_mode(vcpu)) + goto out; + + /* + * Copy over the full size of the VMCB rather than just the size + * of the structs. + */ + if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) + return -EFAULT; + if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, + sizeof(user_vmcb->control))) + return -EFAULT; + if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save, + sizeof(user_vmcb->save))) + return -EFAULT; + +out: + return kvm_state.size; +} + +static int svm_set_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + struct vmcb_control_area ctl; + struct vmcb_save_area save; + u32 cr0; + + if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) + return -EINVAL; + + if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE | + KVM_STATE_NESTED_RUN_PENDING | + KVM_STATE_NESTED_GIF_SET)) + return -EINVAL; + + /* + * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's + * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed. + */ + if (!(vcpu->arch.efer & EFER_SVME)) { + /* GIF=1 and no guest mode are required if SVME=0. */ + if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET) + return -EINVAL; + } + + /* SMM temporarily disables SVM, so we cannot be in guest mode. */ + if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { + svm_leave_nested(svm); + goto out_set_gif; + } + + if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa)) + return -EINVAL; + if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) + return -EINVAL; + if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl))) + return -EFAULT; + if (copy_from_user(&save, &user_vmcb->save, sizeof(save))) + return -EFAULT; + + if (!nested_vmcb_check_controls(&ctl)) + return -EINVAL; + + /* + * Processor state contains L2 state. Check that it is + * valid for guest mode (see nested_vmcb_checks). + */ + cr0 = kvm_read_cr0(vcpu); + if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) + return -EINVAL; + + /* + * Validate host state saved from before VMRUN (see + * nested_svm_check_permissions). + * TODO: validate reserved bits for all saved state. + */ + if (!(save.cr0 & X86_CR0_PG)) + return -EINVAL; + + /* + * All checks done, we can enter guest mode. L1 control fields + * come from the nested save state. Guest state is already + * in the registers, the save area of the nested state instead + * contains saved L1 state. + */ + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); + hsave->save = save; + + svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa; + load_nested_vmcb_control(svm, &ctl); + nested_prepare_vmcb_control(svm); + +out_set_gif: + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + return 0; +} + +struct kvm_x86_nested_ops svm_nested_ops = { + .check_events = svm_check_nested_events, + .get_state = svm_get_nested_state, + .set_state = svm_set_nested_state, +}; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index ce0b10fe5e2b..035da07500e8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -215,21 +215,22 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } -static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + u32 msr = msr_info->index; /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - *data = pmc_read_counter(pmc); + msr_info->data = pmc_read_counter(pmc); return 0; } /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - *data = pmc->eventsel; + msr_info->data = pmc->eventsel; return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a862c768fd54..9e333b91ff78 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,6 +33,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/mce.h> #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> @@ -264,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu) void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { + struct vcpu_svm *svm = to_svm(vcpu); vcpu->arch.efer = efer; if (!npt_enabled) { @@ -274,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) efer &= ~EFER_LME; } - to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; - mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + if (!(efer & EFER_SVME)) { + svm_leave_nested(svm); + svm_set_gif(svm, true); + } + + svm->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_external_interrupt(u32 info) @@ -318,9 +325,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) return 0; } else { - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - pr_err("%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); kvm_rip_write(vcpu, svm->next_rip); } svm_set_interrupt_shadow(vcpu, 0); @@ -333,17 +337,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; - /* - * If we are within a nested VM we'd better #VMEXIT and let the guest - * handle the exception - */ - if (!reinject && - nested_svm_check_exception(svm, nr, has_error_code, error_code)) - return; - kvm_deliver_exception_payload(&svm->vcpu); if (nr == BP_VECTOR && !nrips) { @@ -780,7 +775,7 @@ static __init void svm_adjust_mmio_mask(void) */ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; - kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK); } static void svm_hardware_teardown(void) @@ -890,7 +885,7 @@ static __init int svm_hardware_setup(void) if (npt_enabled && !npt) npt_enabled = false; - kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL); + kvm_configure_mmu(npt_enabled, PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); if (nrips) { @@ -953,16 +948,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (is_guest_mode(vcpu)) - return svm->nested.hsave->control.tsc_offset; - - return vcpu->arch.tsc_offset; -} - static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1208,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->nested.hsave = page_address(hsave_page); + clear_page(svm->nested.hsave); svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); @@ -1364,12 +1350,13 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static inline void svm_enable_vintr(struct vcpu_svm *svm) +static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; /* The following fields are ignored when AVIC is enabled */ WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + set_intercept(svm, INTERCEPT_VINTR); /* * This is just a dummy VINTR to actually cause a vmexit to happen. @@ -1383,18 +1370,19 @@ static inline void svm_enable_vintr(struct vcpu_svm *svm) mark_dirty(svm->vmcb, VMCB_INTR); } -static void svm_set_vintr(struct vcpu_svm *svm) -{ - set_intercept(svm, INTERCEPT_VINTR); - if (is_intercept(svm, INTERCEPT_VINTR)) - svm_enable_vintr(svm); -} - static void svm_clear_vintr(struct vcpu_svm *svm) { + const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; clr_intercept(svm, INTERCEPT_VINTR); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + /* Drop int_ctl fields related to VINTR injection. */ + svm->vmcb->control.int_ctl &= mask; + if (is_guest_mode(&svm->vcpu)) { + WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != + (svm->nested.ctl.int_ctl & V_TPR_MASK)); + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + } + mark_dirty(svm->vmcb, VMCB_INTR); } @@ -1533,14 +1521,6 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) mark_dirty(svm->vmcb, VMCB_DT); } -static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ -} - -static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ -} - static void update_cr0_intercept(struct vcpu_svm *svm) { ulong gcr0 = svm->vcpu.arch.cr0; @@ -1603,7 +1583,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu, true); + svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -1842,6 +1822,25 @@ static bool is_erratum_383(void) return true; } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s, 0); +#endif +} + static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { @@ -1860,11 +1859,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. */ - asm volatile ( - "int $0x12\n"); - /* not sure if we ever come back to this point */ - - return; + kvm_machine_check(); } static int mc_interception(struct vcpu_svm *svm) @@ -1993,6 +1988,38 @@ static int vmrun_interception(struct vcpu_svm *svm) return nested_svm_vmrun(svm); } +void svm_set_gif(struct vcpu_svm *svm, bool value) +{ + if (value) { + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the SMI/NMI window; remove it now. + * Likewise, clear the VINTR intercept, we will set it + * again while processing KVM_REQ_EVENT if needed. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + if (is_intercept(svm, SVM_EXIT_VINTR)) + svm_clear_vintr(svm); + + enable_gif(svm); + if (svm->vcpu.arch.smi_pending || + svm->vcpu.arch.nmi_pending || + kvm_cpu_has_injectable_intr(&svm->vcpu)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + } else { + disable_gif(svm); + + /* + * After a CLGI no interrupts should come. But if vGIF is + * in use, we still rely on the VINTR intercept (rather than + * STGI) to detect an open interrupt window. + */ + if (!vgif_enabled(svm)) + svm_clear_vintr(svm); + } +} + static int stgi_interception(struct vcpu_svm *svm) { int ret; @@ -2000,18 +2027,8 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - /* - * If VGIF is enabled, the STGI intercept is only added to - * detect the opening of the SMI/NMI window; remove it now. - */ - if (vgif_enabled(svm)) - clr_intercept(svm, INTERCEPT_STGI); - ret = kvm_skip_emulated_instruction(&svm->vcpu); - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - - enable_gif(svm); - + svm_set_gif(svm, true); return ret; } @@ -2023,13 +2040,7 @@ static int clgi_interception(struct vcpu_svm *svm) return 1; ret = kvm_skip_emulated_instruction(&svm->vcpu); - - disable_gif(svm); - - /* After a CLGI no interrupts should come */ - if (!kvm_vcpu_apicv_active(&svm->vcpu)) - svm_clear_vintr(svm); - + svm_set_gif(svm, false); return ret; } @@ -2193,7 +2204,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, bool ret = false; u64 intercept; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!is_guest_mode(&svm->vcpu) || (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) @@ -2671,8 +2682,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) */ svm_toggle_avic_for_irq_window(&svm->vcpu, true); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; - mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; return 1; } @@ -2898,8 +2907,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = control->exit_info_2; } -static int handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) +static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -2912,12 +2920,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; - if (unlikely(svm->nested.exit_required)) { - nested_svm_vmexit(svm); - svm->nested.exit_required = false; - - return 1; - } + svm_complete_interrupts(svm); if (is_guest_mode(vcpu)) { int vmexit; @@ -2938,8 +2941,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, return 1; } - svm_complete_interrupts(svm); - if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2957,10 +2958,10 @@ static int handle_exit(struct kvm_vcpu *vcpu, __func__, svm->vmcb->control.exit_int_info, exit_code); - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); + if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + + if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); dump_vmcb(vcpu); @@ -3049,18 +3050,37 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static int svm_nmi_allowed(struct kvm_vcpu *vcpu) +bool svm_nmi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - int ret; - ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - !(svm->vcpu.arch.hflags & HF_NMI_MASK); - ret = ret && gif_set(svm) && nested_svm_nmi(svm); + bool ret; + + if (!gif_set(svm)) + return true; + + if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return false; + + ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || + (svm->vcpu.arch.hflags & HF_NMI_MASK); return ret; } +static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return -EBUSY; + + return !svm_nmi_blocked(vcpu); +} + static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3081,19 +3101,46 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } -static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - if (!gif_set(svm) || - (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) - return 0; + if (!gif_set(svm)) + return true; - if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return !!(svm->vcpu.arch.hflags & HF_HIF_MASK); - else - return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); + if (is_guest_mode(vcpu)) { + /* As long as interrupts are being delivered... */ + if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) + ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) + : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + + /* ... vmexits aren't blocked by the interrupt shadow */ + if (nested_exit_on_intr(svm)) + return false; + } else { + if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + } + + return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK); +} + +static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* + * An IRQ must not be injected into L2 if it's supposed to VM-Exit, + * e.g. if the IRQ arrived asynchronously after checking nested events. + */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) + return -EBUSY; + + return !svm_interrupt_blocked(vcpu); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -3134,9 +3181,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; /* STGI will cause a vm exit */ } - if (svm->nested.exit_required) - return; /* we're not going to run the guest yet */ - /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) @@ -3156,10 +3200,17 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) return 0; } -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +void svm_flush_tlb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * Flush only the current ASID even if the TLB flush was invoked via + * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all + * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and + * unconditionally does a TLB flush on both nested VM-Enter and nested + * VM-Exit (via kvm_mmu_reset_context()). + */ if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; else @@ -3279,10 +3330,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } +static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + if (!is_guest_mode(vcpu) && + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) + return handle_fastpath_set_msr_irqoff(vcpu); + + return EXIT_FASTPATH_NONE; +} + void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static void svm_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { + fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; @@ -3290,13 +3352,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; /* - * A vmexit emulation is required before the vcpu can be executed - * again. - */ - if (unlikely(svm->nested.exit_required)) - return; - - /* * Disable singlestep if we're injecting an interrupt/exception. * We don't want our modified rflags to be pushed on the stack where * we might not be able to easily reset them if we disabled NMI @@ -3387,6 +3442,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) stgi(); /* Any pending NMI will happen here */ + exit_fastpath = svm_exit_handlers_fastpath(vcpu); if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_after_interrupt(&svm->vcpu); @@ -3394,12 +3450,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) sync_cr8_to_lapic(vcpu); svm->next_rip = 0; + if (is_guest_mode(&svm->vcpu)) { + sync_nested_vmcb_control(svm); + svm->nested.nested_run_pending = 0; + } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_flags = + kvm_read_and_reset_apf_flags(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); @@ -3415,12 +3476,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm_handle_mce(svm); mark_all_clean(svm->vmcb); + return exit_fastpath; } static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - bool update_guest_cr3 = true; unsigned long cr3; cr3 = __sme_set(root); @@ -3429,18 +3490,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) mark_dirty(svm->vmcb, VMCB_NPT); /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - cr3 = vcpu->arch.cr3; - else /* CR3 is already up-to-date. */ - update_guest_cr3 = false; + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + return; + cr3 = vcpu->arch.cr3; } - if (update_guest_cr3) { - svm->vmcb->save.cr3 = cr3; - mark_dirty(svm->vmcb, VMCB_CR); - } + svm->vmcb->save.cr3 = cr3; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_disabled(void) @@ -3475,7 +3531,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(int index) +static bool svm_has_emulated_msr(u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: @@ -3628,7 +3684,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, info->intercept == x86_intercept_clts) break; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) break; @@ -3716,13 +3772,8 @@ out: return ret; } -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -3737,23 +3788,28 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } -static int svm_smi_allowed(struct kvm_vcpu *vcpu) +bool svm_smi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); /* Per APM Vol.2 15.22.2 "Response to SMI" */ if (!gif_set(svm)) - return 0; + return true; - if (is_guest_mode(&svm->vcpu) && - svm->nested.intercept & (1ULL << INTERCEPT_SMI)) { - /* TODO: Might need to set exit_info_1 and exit_info_2 here */ - svm->vmcb->control.exit_code = SVM_EXIT_SMI; - svm->nested.exit_required = true; - return 0; - } + return is_smm(vcpu); +} - return 1; +static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) + return -EBUSY; + + return !svm_smi_blocked(vcpu); } static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -3793,12 +3849,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) return 1; nested_vmcb = map.hva; - enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map); + enter_svm_guest_mode(svm, vmcb, nested_vmcb); + kvm_vcpu_unmap(&svm->vcpu, &map, true); } return 0; } -static int enable_smi_window(struct kvm_vcpu *vcpu) +static void enable_smi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3806,9 +3863,9 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) if (vgif_enabled(svm)) set_intercept(svm, INTERCEPT_STGI); /* STGI will cause a vm exit */ - return 1; + } else { + /* We must be in SMM; RSM will cause a vmexit anyway. */ } - return 0; } static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) @@ -3819,6 +3876,13 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) bool is_user = svm_get_cpl(vcpu) == 3; /* + * If RIP is invalid, go ahead with emulation which will cause an + * internal error exit. + */ + if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) + return true; + + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: @@ -3876,9 +3940,9 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) /* * TODO: Last condition latch INIT signals on vCPU when * vCPU is in guest-mode and vmcb12 defines intercept on INIT. - * To properly emulate the INIT intercept, SVM should implement - * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit() - * there if an INIT signal is pending. + * To properly emulate the INIT intercept, + * svm_check_nested_events() should call nested_svm_vmexit() + * if an INIT signal is pending. */ return !gif_set(svm) || (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); @@ -3932,8 +3996,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, - .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, - .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, @@ -3947,8 +4009,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .tlb_flush = svm_flush_tlb, + .tlb_flush_all = svm_flush_tlb, + .tlb_flush_current = svm_flush_tlb, .tlb_flush_gva = svm_flush_tlb_gva, + .tlb_flush_guest = svm_flush_tlb, .run = svm_vcpu_run, .handle_exit = handle_exit, @@ -3989,7 +4053,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_l1_tsc_offset = svm_write_l1_tsc_offset, .load_mmu_pgd = svm_load_mmu_pgd, @@ -4002,6 +4065,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .sched_in = svm_sched_in, .pmu_ops = &amd_pmu_ops, + .nested_ops = &svm_nested_ops, + .deliver_posted_interrupt = svm_deliver_avic_intr, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, @@ -4016,14 +4081,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, - - .check_nested_events = svm_check_nested_events, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index df3474f4fb02..6ac4c00a5d82 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -86,25 +86,17 @@ struct nested_state { u64 hsave_msr; u64 vm_cr_msr; u64 vmcb; + u32 host_intercept_exceptions; /* These are the merged vectors */ u32 *msrpm; - /* gpa pointers to the real vectors */ - u64 vmcb_msrpm; - u64 vmcb_iopm; + /* A VMRUN has started but has not yet been performed, so + * we cannot inject a nested vmexit yet. */ + bool nested_run_pending; - /* A VMEXIT is required but not yet emulated */ - bool exit_required; - - /* cache for intercepts of the guest */ - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - - /* Nested Paging related state */ - u64 nested_cr3; + /* cache for control fields of the guest */ + struct vmcb_control_area ctl; }; struct vcpu_svm { @@ -360,8 +352,12 @@ u32 svm_msrpm_offset(u32 msr); void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); +void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); +bool svm_smi_blocked(struct kvm_vcpu *vcpu); +bool svm_nmi_blocked(struct kvm_vcpu *vcpu); +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); +void svm_set_gif(struct vcpu_svm *svm, bool value); /* nested.c */ @@ -369,28 +365,31 @@ void disable_nmi_singlestep(struct vcpu_svm *svm); #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ -/* This function returns true if it is save to enable the nmi window */ -static inline bool nested_svm_nmi(struct vcpu_svm *svm) +static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(&svm->vcpu)) - return true; + struct vcpu_svm *svm = to_svm(vcpu); - if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) - return true; + return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK); +} - svm->vmcb->control.exit_code = SVM_EXIT_NMI; - svm->nested.exit_required = true; +static inline bool nested_exit_on_smi(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI)); +} - return false; +static inline bool nested_exit_on_intr(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR)); } -static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) +static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) { - return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI)); } void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map); + struct vmcb *nested_vmcb); +void svm_leave_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct vcpu_svm *svm); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); @@ -398,8 +397,10 @@ int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct vcpu_svm *svm); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -int svm_check_nested_events(struct kvm_vcpu *vcpu); int nested_svm_exit_special(struct vcpu_svm *svm); +void sync_nested_vmcb_control(struct vcpu_svm *svm); + +extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 249062f24b94..b66432b015d2 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -225,6 +225,14 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 +#define kvm_print_exit_reason(exit_reason, isa) \ + (isa == KVM_ISA_VMX) ? \ + __print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \ + __print_symbolic(exit_reason, SVM_EXIT_REASONS), \ + (isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \ + (isa == KVM_ISA_VMX) ? \ + __print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : "" + /* * Tracepoint for kvm guest exit: */ @@ -250,12 +258,10 @@ TRACE_EVENT(kvm_exit, &__entry->info2); ), - TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx", + TP_printk("vcpu %u reason %s%s%s rip 0x%lx info %llx %llx", __entry->vcpu_id, - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS), - __entry->guest_rip, __entry->info1, __entry->info2) + kvm_print_exit_reason(__entry->exit_reason, __entry->isa), + __entry->guest_rip, __entry->info1, __entry->info2) ); /* @@ -588,12 +594,10 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info_err = exit_int_info_err; __entry->isa = isa; ), - TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " + TP_printk("rip: 0x%016llx reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", __entry->rip, - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), + kvm_print_exit_reason(__entry->exit_code, __entry->isa), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); @@ -626,13 +630,11 @@ TRACE_EVENT(kvm_nested_vmexit_inject, __entry->isa = isa; ), - TP_printk("reason: %s ext_inf1: 0x%016llx " + TP_printk("reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), - __entry->exit_info1, __entry->exit_info2, - __entry->exit_int_info, __entry->exit_int_info_err) + kvm_print_exit_reason(__entry->exit_code, __entry->isa), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) ); /* @@ -1539,6 +1541,57 @@ TRACE_EVENT(kvm_nested_vmenter_failed, __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); +/* + * Tracepoint for syndbg_set_msr. + */ +TRACE_EVENT(kvm_hv_syndbg_set_msr, + TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), + TP_ARGS(vcpu_id, vp_index, msr, data), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, vp_index) + __field(u32, msr) + __field(u64, data) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vp_index = vp_index; + __entry->msr = msr; + __entry->data = data; + ), + + TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", + __entry->vcpu_id, __entry->vp_index, __entry->msr, + __entry->data) +); + +/* + * Tracepoint for syndbg_get_msr. + */ +TRACE_EVENT(kvm_hv_syndbg_get_msr, + TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), + TP_ARGS(vcpu_id, vp_index, msr, data), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, vp_index) + __field(u32, msr) + __field(u64, data) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vp_index = vp_index; + __entry->msr = msr; + __entry->data = data; + ), + + TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", + __entry->vcpu_id, __entry->vp_index, __entry->msr, + __entry->data) +); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 8903475f751e..4bbd8b448d22 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -18,6 +18,8 @@ extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 #define PT_MODE_HOST_GUEST 1 +#define PMU_CAP_FW_WRITES (1ULL << 13) + struct nested_vmx_msrs { /* * We only store the "true" versions of the VMX capability MSRs. We @@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void) return pt_mode == PT_MODE_HOST_GUEST; } +static inline u64 vmx_get_perf_capabilities(void) +{ + /* + * Since counters are virtualized, KVM would support full + * width counting unconditionally, even if the host lacks it. + */ + return PMU_CAP_FW_WRITES; +} + #endif /* __KVM_X86_VMX_CAPS_H */ diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 303813423c3e..e5325bd0f304 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include "../hyperv.h" +#include "../cpuid.h" #include "evmcs.h" #include "vmcs.h" #include "vmx.h" @@ -160,14 +161,6 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr, HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), /* 32 bit rw */ EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold, @@ -334,17 +327,18 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * vmcs_version represents the range of supported Enlightened VMCS - * versions: lower 8 bits is the minimal version, higher 8 bits is the - * maximum supported version. KVM supports versions from 1 to - * KVM_EVMCS_VERSION. - */ - if (vmx->nested.enlightened_vmcs_enabled) - return (KVM_EVMCS_VERSION << 8) | 1; - - return 0; + struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * vmcs_version represents the range of supported Enlightened VMCS + * versions: lower 8 bits is the minimal version, higher 8 bits is the + * maximum supported version. KVM supports versions from 1 to + * KVM_EVMCS_VERSION. + */ + if (kvm_cpu_cap_get(X86_FEATURE_VMX) && + vmx->nested.enlightened_vmcs_enabled) + return (KVM_EVMCS_VERSION << 8) | 1; + + return 0; } void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e44f33c82332..9c74a732b08d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -303,11 +303,11 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) cpu = get_cpu(); prev = vmx->loaded_vmcs; vmx->loaded_vmcs = vmcs; - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, prev); vmx_sync_vmcs_host_state(vmx, prev); put_cpu(); - vmx_segment_cache_clear(vmx); + vmx_register_cache_reset(vcpu); } /* @@ -328,19 +328,19 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exit_reason; + u32 vm_exit_reason; unsigned long exit_qualification = vcpu->arch.exit_qualification; if (vmx->nested.pml_full) { - exit_reason = EXIT_REASON_PML_FULL; + vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; } else if (fault->error_code & PFERR_RSVD_MASK) - exit_reason = EXIT_REASON_EPT_MISCONFIG; + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; else - exit_reason = EXIT_REASON_EPT_VIOLATION; + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; - nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification); + nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; } @@ -437,11 +437,6 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, } } -static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) -{ - return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); -} - static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -698,11 +693,6 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) VM_EXIT_ACK_INTR_ON_EXIT; } -static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) -{ - return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); -} - static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -927,6 +917,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) } return 0; fail: + /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */ return i + 1; } @@ -1074,34 +1065,81 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) } /* + * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit. + * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't + * enable VPID for L2 (implying it expects a TLB flush on VMX transitions). + * Here's why. + * + * If EPT is enabled by L0 a sync is never needed: + * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there + * cannot be unsync'd SPTEs for either L1 or L2. + * + * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter + * VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings + * (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush + * stale guest-physical mappings for L2 from the TLB. And as above, L0 isn't + * shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit. + * + * If EPT is disabled by L0: + * - if VPID is enabled by L1 (for L2), the situation is similar to when L1 + * enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't + * required to invalidate linear mappings (EPT is disabled so there are + * no combined or guest-physical mappings), i.e. L1 can't rely on the + * (virtual) CPU to flush stale linear mappings for either L2 or itself (L1). + * + * - however if VPID is disabled by L1, then a sync is needed as L1 expects all + * linear mappings (EPT is disabled so there are no combined or guest-physical + * mappings) to be invalidated on both VM-Enter and VM-Exit. + * + * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which + * additionally checks that L2 has been assigned a VPID (when EPT is disabled). + * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect + * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2 + * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has + * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1 + * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't + * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush + * stale TLB entries, at which point L0 will sync L2's MMU. + */ +static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu) +{ + return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu)); +} + +/* * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are * emulating VM-Entry into a guest with EPT enabled. On failure, the expected * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to * @entry_failure_code. */ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, - u32 *entry_failure_code) + enum vm_entry_failure_code *entry_failure_code) { - if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { - if (CC(!nested_cr3_valid(vcpu, cr3))) { - *entry_failure_code = ENTRY_FAIL_DEFAULT; - return -EINVAL; - } + if (CC(!nested_cr3_valid(vcpu, cr3))) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return -EINVAL; + } - /* - * If PAE paging and EPT are both on, CR3 is not used by the CPU and - * must not be dereferenced. - */ - if (is_pae_paging(vcpu) && !nested_ept) { - if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { - *entry_failure_code = ENTRY_FAIL_PDPTE; - return -EINVAL; - } + /* + * If PAE paging and EPT are both on, CR3 is not used by the CPU and + * must not be dereferenced. + */ + if (!nested_ept && is_pae_paging(vcpu) && + (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) { + if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { + *entry_failure_code = ENTRY_FAIL_PDPTE; + return -EINVAL; } } + /* + * Unconditionally skip the TLB flush on fast CR3 switch, all TLB + * flushes are handled by nested_vmx_transition_tlb_flush(). See + * nested_vmx_transition_mmu_sync for details on skipping the MMU sync. + */ if (!nested_ept) - kvm_mmu_new_cr3(vcpu, cr3, false); + kvm_mmu_new_pgd(vcpu, cr3, true, + !nested_vmx_transition_mmu_sync(vcpu)); vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); @@ -1132,11 +1170,48 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); } -static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) +static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12, + bool is_vmenter) { struct vcpu_vmx *vmx = to_vmx(vcpu); - return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; + /* + * If VPID is disabled, linear and combined mappings are flushed on + * VM-Enter/VM-Exit, and guest-physical mappings are valid only for + * their associated EPTP. + */ + if (!enable_vpid) + return; + + /* + * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings + * for *all* contexts to be flushed on VM-Enter/VM-Exit. + * + * If VPID is enabled and used by vmc12, but L2 does not have a unique + * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate + * a VPID for L2, flush the current context as the effective ASID is + * common to both L1 and L2. + * + * Defer the flush so that it runs after vmcs02.EPTP has been set by + * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid + * redundant flushes further down the nested pipeline. + * + * If a TLB flush isn't required due to any of the above, and vpid12 is + * changing then the new "virtual" VPID (vpid12) will reuse the same + * "real" VPID (vpid02), and so needs to be sync'd. There is no direct + * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for + * all nested vCPUs. + */ + if (!nested_cpu_has_vpid(vmcs12)) { + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } else if (!nested_has_guest_tlb_tag(vcpu)) { + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } else if (is_vmenter && + vmcs12->virtual_processor_id != vmx->nested.last_vpid) { + vmx->nested.last_vpid = vmcs12->virtual_processor_id; + vpid_sync_context(nested_get_vpid02(vcpu)); + } } static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) @@ -1700,10 +1775,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr; * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr; * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr; - * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0; - * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1; - * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2; - * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3; * vmcs12->page_fault_error_code_mask = * evmcs->page_fault_error_code_mask; * vmcs12->page_fault_error_code_match = @@ -1777,10 +1848,6 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr; * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr; * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr; - * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0; - * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1; - * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2; - * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3; * evmcs->tpr_threshold = vmcs12->tpr_threshold; * evmcs->virtual_processor_id = vmcs12->virtual_processor_id; * evmcs->exception_bitmap = vmcs12->exception_bitmap; @@ -2020,9 +2087,25 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) +static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> + VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; + + if (!vmx->nested.has_preemption_timer_deadline) { + vmx->nested.preemption_timer_deadline = + vmcs12->vmx_preemption_timer_value + l1_scaled_tsc; + vmx->nested.has_preemption_timer_deadline = true; + } + return vmx->nested.preemption_timer_deadline - l1_scaled_tsc; +} + +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, + u64 preemption_timeout) { - u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; struct vcpu_vmx *vmx = to_vmx(vcpu); /* @@ -2041,7 +2124,8 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) preemption_timeout *= 1000000; do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); hrtimer_start(&vmx->nested.preemption_timer, - ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); + ktime_add_ns(ktime_get(), preemption_timeout), + HRTIMER_MODE_ABS_PINNED); } static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) @@ -2398,7 +2482,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * is assigned to entry_failure_code on failure. */ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 *entry_failure_code) + enum vm_entry_failure_code *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; @@ -2447,32 +2531,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); - if (enable_vpid) { - /* - * There is no direct mapping between vpid02 and vpid12, the - * vpid02 is per-vCPU for L0 and reused while the value of - * vpid12 is changed w/ one invvpid during nested vmentry. - * The vpid12 is allocated by L1 for L2, so it will not - * influence global bitmap(for vpid01 and vpid02 allocation) - * even if spawn a lot of nested vCPUs. - */ - if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) { - if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { - vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false); - } - } else { - /* - * If L1 use EPT, then L0 needs to execute INVEPT on - * EPTP02 instead of EPTP01. Therefore, delay TLB - * flush until vmcs02->eptp is fully updated by - * KVM_REQ_LOAD_MMU_PGD. Note that this assumes - * KVM_REQ_TLB_FLUSH is evaluated after - * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest(). - */ - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - } + nested_vmx_transition_tlb_flush(vcpu, vmcs12, true); if (nested_cpu_has_ept(vmcs12)) nested_ept_init_mmu_context(vcpu); @@ -2883,11 +2942,11 @@ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 *exit_qual) + enum vm_entry_failure_code *entry_failure_code) { bool ia32e; - *exit_qual = ENTRY_FAIL_DEFAULT; + *entry_failure_code = ENTRY_FAIL_DEFAULT; if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) || CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) @@ -2902,7 +2961,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { - *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; + *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR; return -EINVAL; } @@ -3194,9 +3253,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + enum vm_entry_failure_code entry_failure_code; bool evaluate_pending_interrupts; - u32 exit_reason = EXIT_REASON_INVALID_STATE; - u32 exit_qual; + u32 exit_reason, failed_index; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); evaluate_pending_interrupts = exec_controls_get(vmx) & (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); @@ -3241,24 +3303,33 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, return NVMX_VMENTRY_VMFAIL; } - if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) + if (nested_vmx_check_guest_state(vcpu, vmcs12, + &entry_failure_code)) { + exit_reason = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; goto vmentry_fail_vmexit; + } } enter_guest_mode(vcpu); if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset += vmcs12->tsc_offset; - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) + if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { + exit_reason = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; goto vmentry_fail_vmexit_guest_mode; + } if (from_vmentry) { - exit_reason = EXIT_REASON_MSR_LOAD_FAIL; - exit_qual = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (exit_qual) + failed_index = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (failed_index) { + exit_reason = EXIT_REASON_MSR_LOAD_FAIL; + vmcs12->exit_qualification = failed_index; goto vmentry_fail_vmexit_guest_mode; + } } else { /* * The MMU is not initialized to point at the right entities yet and @@ -3293,8 +3364,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * the timer. */ vmx->nested.preemption_timer_expired = false; - if (nested_cpu_has_preemption_timer(vmcs12)) - vmx_start_preemption_timer(vcpu); + if (nested_cpu_has_preemption_timer(vmcs12)) { + u64 timer_value = vmx_calc_preemption_timer_value(vcpu); + vmx_start_preemption_timer(vcpu, timer_value); + } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -3322,7 +3395,6 @@ vmentry_fail_vmexit: load_vmcs12_host_state(vcpu, vmcs12); vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; - vmcs12->exit_qualification = exit_qual; if (enable_shadow_vmcs || vmx->nested.hv_evmcs) vmx->nested.need_vmcs12_to_shadow_sync = true; return NVMX_VMENTRY_VMEXIT; @@ -3403,6 +3475,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ vmx->nested.nested_run_pending = 1; + vmx->nested.has_preemption_timer_deadline = false; status = nested_vmx_enter_non_root_mode(vcpu, true); if (unlikely(status != NVMX_VMENTRY_SUCCESS)) goto vmentry_failed; @@ -3632,6 +3705,12 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) vcpu->arch.exception.payload); } +static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) +{ + return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && + to_vmx(vcpu)->nested.preemption_timer_expired; +} + static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3661,11 +3740,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) /* * Process any exceptions that are not debug traps before MTF. */ - if (vcpu->arch.exception.pending && - !vmx_pending_dbg_trap(vcpu) && - nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_vmx_check_exception(vcpu, &exit_qual)) + goto no_vmexit; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); return 0; } @@ -3678,25 +3757,34 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (vcpu->arch.exception.pending && - nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vcpu->arch.exception.pending) { if (block_nested_events) return -EBUSY; + if (!nested_vmx_check_exception(vcpu, &exit_qual)) + goto no_vmexit; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); return 0; } - if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && - vmx->nested.preemption_timer_expired) { + if (nested_vmx_preemption_timer_pending(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } - if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { + if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + if (block_nested_events) + return -EBUSY; + goto no_vmexit; + } + + if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_nmi(vcpu)) + goto no_vmexit; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK, 0); @@ -3709,13 +3797,16 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { + if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_intr(vcpu)) + goto no_vmexit; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; } +no_vmexit: vmx_complete_nested_posted_interrupt(vcpu); return 0; } @@ -3842,12 +3933,12 @@ static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, cpu = get_cpu(); vmx->loaded_vmcs = &vmx->nested.vmcs02; - vmx_vcpu_load(&vmx->vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01); sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); vmx->loaded_vmcs = &vmx->vmcs01; - vmx_vcpu_load(&vmx->vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02); put_cpu(); } @@ -3876,10 +3967,6 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); - vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); - vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); - vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -3889,9 +3976,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; if (nested_cpu_has_preemption_timer(vmcs12) && - vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) - vmcs12->vmx_preemption_timer_value = - vmx_get_preemption_timer_value(vcpu); + vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER && + !vmx->nested.nested_run_pending) + vmcs12->vmx_preemption_timer_value = + vmx_get_preemption_timer_value(vcpu); /* * In some cases (usually, nested EPT), L2 is allowed to change its @@ -3939,11 +4027,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * which already writes to vmcs12 directly. */ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 exit_reason, u32 exit_intr_info, + u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification) { /* update exit information fields: */ - vmcs12->vm_exit_reason = exit_reason; + vmcs12->vm_exit_reason = vm_exit_reason; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; @@ -3998,8 +4086,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + enum vm_entry_failure_code ignored; struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -4034,30 +4122,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * Only PDPTE load can fail as the value of cr3 was checked on entry and * couldn't have changed. */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored)) nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; - /* - * If vmcs01 doesn't use VPID, CPU flushes TLB on every - * VMEntry/VMExit. Thus, no need to flush TLB. - * - * If vmcs12 doesn't use VPID, L1 expects TLB to be - * flushed on every VMEntry/VMExit. - * - * Otherwise, we can preserve TLB entries as long as we are - * able to tag L1 TLB entries differently than L2 TLB entries. - * - * If vmcs12 uses EPT, we need to execute this flush on EPTP01 - * and therefore we request the TLB flush to happen only after VMCS EPTP - * has been set by KVM_REQ_LOAD_MMU_PGD. - */ - if (enable_vpid && - (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) { - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } + nested_vmx_transition_tlb_flush(vcpu, vmcs12, false); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); @@ -4204,7 +4275,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) * VMFail, like everything else we just need to ensure our * software model is up-to-date. */ - if (enable_ept) + if (enable_ept && is_pae_paging(vcpu)) ept_save_pdptrs(vcpu); kvm_mmu_reset_context(vcpu); @@ -4272,7 +4343,7 @@ vmabort: * and modify vmcs12 to make it see what it would expect to see there if * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) */ -void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, +void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4281,6 +4352,10 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + /* Service the TLB flush request for L2 before switching to L1. */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); + leave_guest_mode(vcpu); if (nested_cpu_has_preemption_timer(vmcs12)) @@ -4292,9 +4367,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (likely(!vmx->fail)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); - if (exit_reason != -1) - prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, - exit_qualification); + if (vm_exit_reason != -1) + prepare_vmcs12(vcpu, vmcs12, vm_exit_reason, + exit_intr_info, exit_qualification); /* * Must happen outside of sync_vmcs02_to_vmcs12() as it will @@ -4344,20 +4419,20 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); vmx->nested.pi_desc = NULL; - /* - * We are now running in L2, mmu_notifier will force to reload the - * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. - */ - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + if (vmx->nested.reload_vmcs01_apic_access_page) { + vmx->nested.reload_vmcs01_apic_access_page = false; + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + } - if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) + if ((vm_exit_reason != -1) && + (enable_shadow_vmcs || vmx->nested.hv_evmcs)) vmx->nested.need_vmcs12_to_shadow_sync = true; /* in case we halted in L2 */ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); @@ -4365,7 +4440,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; } - if (exit_reason != -1) + if (vm_exit_reason != -1) trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, vmcs12->exit_qualification, vmcs12->idt_vectoring_info_field, @@ -4554,13 +4629,13 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) gva_t gva; struct x86_exception e; - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmcs_read32(VMX_INSTRUCTION_INFO), false, sizeof(*vmpointer), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } @@ -4614,7 +4689,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) goto out_shadow_vmcs; hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_ABS_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; vmx->nested.vpid02 = allocate_vpid(); @@ -4819,7 +4894,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) : get_vmcs12(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; @@ -4869,7 +4944,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } } @@ -4905,7 +4980,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) : get_vmcs12(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; @@ -4943,7 +5018,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) instr_info, false, len, &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } } @@ -5090,7 +5165,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qual = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; @@ -5108,23 +5183,33 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, sizeof(gpa_t), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } return nested_vmx_succeed(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 vmx_instruction_info, types; - unsigned long type; + unsigned long type, roots_to_free; + struct kvm_mmu *mmu; gva_t gva; struct x86_exception e; struct { u64 eptp, gpa; } operand; + int i; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || @@ -5148,27 +5233,49 @@ static int handle_invept(struct kvm_vcpu *vcpu) /* According to the Intel VMX instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } - switch (type) { - case VMX_EPT_EXTENT_GLOBAL: - case VMX_EPT_EXTENT_CONTEXT: /* - * TODO: Sync the necessary shadow EPT roots here, rather than - * at the next emulated VM-entry. + * Nested EPT roots are always held through guest_mmu, + * not root_mmu. */ + mmu = &vcpu->arch.guest_mmu; + + switch (type) { + case VMX_EPT_EXTENT_CONTEXT: + if (!nested_vmx_check_eptp(vcpu, operand.eptp)) + return nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + + roots_to_free = 0; + if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd, + operand.eptp)) + roots_to_free |= KVM_MMU_ROOT_CURRENT; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + if (nested_ept_root_matches(mmu->prev_roots[i].hpa, + mmu->prev_roots[i].pgd, + operand.eptp)) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + } + break; + case VMX_EPT_EXTENT_GLOBAL: + roots_to_free = KVM_MMU_ROOTS_ALL; break; default: BUG(); break; } + if (roots_to_free) + kvm_mmu_free_roots(vcpu, mmu, roots_to_free); + return nested_vmx_succeed(vcpu); } @@ -5208,11 +5315,11 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) /* according to the intel vmx instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } if (operand.vpid >> 16) @@ -5226,27 +5333,37 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) is_noncanonical_address(operand.gla, vcpu)) return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - if (cpu_has_vmx_invvpid_individual_addr()) { - __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, - vpid02, operand.gla); - } else - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_vcpu_addr(vpid02, operand.gla); break; case VMX_VPID_EXTENT_SINGLE_CONTEXT: case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: if (!operand.vpid) return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_context(vpid02); break; case VMX_VPID_EXTENT_ALL_CONTEXT: - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_context(vpid02); break; default: WARN_ON_ONCE(1); return kvm_skip_emulated_instruction(vcpu); } + /* + * Sync the shadow page tables if EPT is disabled, L1 is invalidating + * linear mappings for L2 (tagged with L2's VPID). Free all roots as + * VPIDs are not tracked in the MMU role. + * + * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share + * an MMU when EPT is disabled. + * + * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR. + */ + if (!enable_ept) + kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, + KVM_MMU_ROOTS_ALL); + return nested_vmx_succeed(vcpu); } @@ -5327,8 +5444,8 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) fail: nested_vmx_vmexit(vcpu, vmx->exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); + vmx_get_intr_info(vcpu), + vmx_get_exit_qual(vcpu)); return 1; } @@ -5379,7 +5496,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -5433,7 +5550,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int cr = exit_qualification & 15; int reg; unsigned long val; @@ -5449,15 +5566,6 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, return true; break; case 3: - if ((vmcs12->cr3_target_count >= 1 && - vmcs12->cr3_target_value0 == val) || - (vmcs12->cr3_target_count >= 2 && - vmcs12->cr3_target_value1 == val) || - (vmcs12->cr3_target_count >= 3 && - vmcs12->cr3_target_value2 == val) || - (vmcs12->cr3_target_count >= 4 && - vmcs12->cr3_target_value3 == val)) - return false; if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) return true; break; @@ -5551,49 +5659,85 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) } /* - * Return true if we should exit from L2 to L1 to handle an exit, or false if we - * should handle it ourselves in L0 (and then continue L2). Only call this - * when in is_guest_mode (L2). + * Return true if L0 wants to handle an exit from L2 regardless of whether or not + * L1 wants the exit. Only call this when in is_guest_mode (L2). */ -bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) +static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) { - u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - WARN_ON_ONCE(vmx->nested.nested_run_pending); - - if (unlikely(vmx->fail)) { - trace_kvm_nested_vmenter_failed( - "hardware VM-instruction error: ", - vmcs_read32(VM_INSTRUCTION_ERROR)); - return true; - } - - trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, - vmcs_readl(EXIT_QUALIFICATION), - vmx->idt_vectoring_info, - intr_info, - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - KVM_ISA_VMX); + u32 intr_info; switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); if (is_nmi(intr_info)) - return false; + return true; else if (is_page_fault(intr_info)) - return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; + return vcpu->arch.apf.host_apf_flags || !enable_ept; else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; + return true; else if (is_breakpoint(intr_info) && vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return false; + return true; + return false; + case EXIT_REASON_EXTERNAL_INTERRUPT: + return true; + case EXIT_REASON_MCE_DURING_VMENTRY: + return true; + case EXIT_REASON_EPT_VIOLATION: + /* + * L0 always deals with the EPT violation. If nested EPT is + * used, and the nested mmu code discovers that the address is + * missing in the guest EPT table (EPT12), the EPT violation + * will be injected with nested_ept_inject_page_fault() + */ + return true; + case EXIT_REASON_EPT_MISCONFIG: + /* + * L2 never uses directly L1's EPT, but rather L0's own EPT + * table (shadow on EPT) or a merged EPT table that L0 built + * (EPT on EPT). So any problems with the structure of the + * table is L0's fault. + */ + return true; + case EXIT_REASON_PREEMPTION_TIMER: + return true; + case EXIT_REASON_PML_FULL: + /* We emulate PML support to L1. */ + return true; + case EXIT_REASON_VMFUNC: + /* VM functions are emulated through L2->L0 vmexits. */ + return true; + case EXIT_REASON_ENCLS: + /* SGX is never exposed to L1 */ + return true; + default: + break; + } + return false; +} + +/* + * Return 1 if L1 wants to intercept an exit from L2. Only call this when in + * is_guest_mode (L2). + */ +static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 intr_info; + + switch (exit_reason) { + case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); + if (is_nmi(intr_info)) + return true; + else if (is_page_fault(intr_info)) + return true; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: - return false; + return nested_exit_on_intr(vcpu); case EXIT_REASON_TRIPLE_FAULT: return true; case EXIT_REASON_INTERRUPT_WINDOW: @@ -5658,7 +5802,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) nested_cpu_has2(vmcs12, SECONDARY_EXEC_PAUSE_LOOP_EXITING); case EXIT_REASON_MCE_DURING_VMENTRY: - return false; + return true; case EXIT_REASON_TPR_BELOW_THRESHOLD: return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: @@ -5670,22 +5814,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) * delivery" only come from vmcs12. */ return true; - case EXIT_REASON_EPT_VIOLATION: - /* - * L0 always deals with the EPT violation. If nested EPT is - * used, and the nested mmu code discovers that the address is - * missing in the guest EPT table (EPT12), the EPT violation - * will be injected with nested_ept_inject_page_fault() - */ - return false; - case EXIT_REASON_EPT_MISCONFIG: - /* - * L2 never uses directly L1's EPT, but rather L0's own EPT - * table (shadow on EPT) or a merged EPT table that L0 built - * (EPT on EPT). So any problems with the structure of the - * table is L0's fault. - */ - return false; case EXIT_REASON_INVPCID: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && @@ -5702,17 +5830,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); - case EXIT_REASON_PREEMPTION_TIMER: - return false; - case EXIT_REASON_PML_FULL: - /* We emulate PML support to L1. */ - return false; - case EXIT_REASON_VMFUNC: - /* VM functions are emulated through L2->L0 vmexits. */ - return false; - case EXIT_REASON_ENCLS: - /* SGX is never exposed to L1 */ - return false; case EXIT_REASON_UMWAIT: case EXIT_REASON_TPAUSE: return nested_cpu_has2(vmcs12, @@ -5722,6 +5839,67 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) } } +/* + * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was + * reflected into L1. + */ +bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exit_reason = vmx->exit_reason; + unsigned long exit_qual; + u32 exit_intr_info; + + WARN_ON_ONCE(vmx->nested.nested_run_pending); + + /* + * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM + * has already loaded L2's state. + */ + if (unlikely(vmx->fail)) { + trace_kvm_nested_vmenter_failed( + "hardware VM-instruction error: ", + vmcs_read32(VM_INSTRUCTION_ERROR)); + exit_intr_info = 0; + exit_qual = 0; + goto reflect_vmexit; + } + + exit_intr_info = vmx_get_intr_info(vcpu); + exit_qual = vmx_get_exit_qual(vcpu); + + trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, exit_qual, + vmx->idt_vectoring_info, exit_intr_info, + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + KVM_ISA_VMX); + + /* If L0 (KVM) wants the exit, it trumps L1's desires. */ + if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) + return false; + + /* If L1 doesn't want the exit, handle it in L0. */ + if (!nested_vmx_l1_wants_exit(vcpu, exit_reason)) + return false; + + /* + * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For + * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would + * need to be synthesized by querying the in-kernel LAPIC, but external + * interrupts are never reflected to L1 so it's a non-issue. + */ + if ((exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + +reflect_vmexit: + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual); + return true; +} static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, @@ -5733,8 +5911,10 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, .flags = 0, .format = KVM_STATE_NESTED_FORMAT_VMX, .size = sizeof(kvm_state), + .hdr.vmx.flags = 0, .hdr.vmx.vmxon_pa = -1ull, .hdr.vmx.vmcs12_pa = -1ull, + .hdr.vmx.preemption_timer_deadline = 0, }; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; @@ -5776,6 +5956,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx->nested.mtf_pending) kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING; + + if (nested_cpu_has_preemption_timer(vmcs12) && + vmx->nested.has_preemption_timer_deadline) { + kvm_state.hdr.vmx.flags |= + KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE; + kvm_state.hdr.vmx.preemption_timer_deadline = + vmx->nested.preemption_timer_deadline; + } } } @@ -5821,7 +6009,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } - out: return kvm_state.size; } @@ -5844,7 +6031,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - u32 exit_qual; + enum vm_entry_failure_code ignored; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; int ret; @@ -5983,9 +6170,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, goto error_guest_mode; } + if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { + vmx->nested.has_preemption_timer_deadline = true; + vmx->nested.preemption_timer_deadline = + kvm_state->hdr.vmx.preemption_timer_deadline; + } + if (nested_vmx_check_controls(vcpu, vmcs12) || nested_vmx_check_host_state(vcpu, vmcs12) || - nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) + nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; @@ -6031,7 +6224,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_exit_reflected() will not pass related exits to L1. + * nested_vmx_l1_wants_exit() will not pass related exits to L1. * These rules have exceptions below. */ @@ -6259,8 +6452,7 @@ void nested_vmx_hardware_unsetup(void) } } -__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, - int (*exit_handlers[])(struct kvm_vcpu *)) +__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; @@ -6296,12 +6488,15 @@ __init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid; exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; - ops->check_nested_events = vmx_check_nested_events; - ops->get_nested_state = vmx_get_nested_state; - ops->set_nested_state = vmx_set_nested_state; - ops->get_vmcs12_pages = nested_get_vmcs12_pages; - ops->nested_enable_evmcs = nested_enable_evmcs; - ops->nested_get_evmcs_version = nested_get_evmcs_version; - return 0; } + +struct kvm_x86_nested_ops vmx_nested_ops = { + .check_events = vmx_check_nested_events, + .hv_timer_pending = nested_vmx_preemption_timer_pending, + .get_state = vmx_get_nested_state, + .set_state = vmx_set_nested_state, + .get_vmcs12_pages = nested_get_vmcs12_pages, + .enable_evmcs = nested_enable_evmcs, + .get_evmcs_version = nested_get_evmcs_version, +}; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index ac56aefa49e3..758bccc26cf9 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -19,14 +19,13 @@ enum nvmx_vmentry_status { void vmx_leave_nested(struct kvm_vcpu *vcpu); void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); void nested_vmx_hardware_unsetup(void); -__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, - int (*exit_handlers[])(struct kvm_vcpu *)); +__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); -bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); -void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, +bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu); +void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification); void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu); int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -62,6 +61,13 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu) vmx->nested.hv_evmcs; } +static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; +} + static inline unsigned long nested_ept_get_eptp(struct kvm_vcpu *vcpu) { /* return the page table to be shadowed - in our case, EPT12 */ @@ -74,34 +80,6 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) } /* - * Reflect a VM Exit into L1. - */ -static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, - u32 exit_reason) -{ - u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* - * At this point, the exit interruption info in exit_intr_info - * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT - * we need to query the in-kernel LAPIC. - */ - WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); - if ((exit_intr_info & - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - vmcs12->vm_exit_intr_error_code = - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - } - - nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, - vmcs_readl(EXIT_QUALIFICATION)); - return 1; -} - -/* * Return the cr0 value that a nested guest would read. This is a combination * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by * its hypervisor (cr0_read_shadow). @@ -246,6 +224,11 @@ static inline bool nested_cpu_has_save_preemption_timer(struct vmcs12 *vmcs12) VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; } +static inline bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) +{ + return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); +} + /* * In nested virtualization, check if L1 asked to exit on external interrupts. * For most existing hypervisors, this will always return true. @@ -299,4 +282,6 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) #define nested_guest_cr4_valid nested_cr4_valid #define nested_host_cr4_valid nested_cr4_valid +extern struct kvm_x86_nested_ops vmx_nested_ops; + #endif /* __KVM_X86_VMX_NESTED_H */ diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 19717d0a1100..5f1ac002b4b6 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -268,42 +268,38 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } -static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) -{ - if (vpid == 0) - return true; - - if (cpu_has_vmx_invvpid_individual_addr()) { - __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); - return true; - } - - return false; -} - static inline void vpid_sync_vcpu_single(int vpid) { if (vpid == 0) return; - if (cpu_has_vmx_invvpid_single()) - __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); } static inline void vpid_sync_vcpu_global(void) { - if (cpu_has_vmx_invvpid_global()) - __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); + __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); } static inline void vpid_sync_context(int vpid) { if (cpu_has_vmx_invvpid_single()) vpid_sync_vcpu_single(vpid); - else + else if (vpid != 0) vpid_sync_vcpu_global(); } +static inline void vpid_sync_vcpu_addr(int vpid, gva_t addr) +{ + if (vpid == 0) + return; + + if (cpu_has_vmx_invvpid_individual_addr()) + __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); + else + vpid_sync_context(vpid); +} + static inline void ept_sync_global(void) { __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7c857737b438..d33d890b605f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -18,6 +18,8 @@ #include "nested.h" #include "pmu.h" +#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) + static struct kvm_event_hw_type_mapping intel_arch_events[] = { /* Index must match CPUID 0x0A.EBX bit vector */ [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, @@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, return &counters[array_index_nospec(idx, num_counters)]; } +static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) +{ + if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return false; + + return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES; +} + +static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) +{ + if (!fw_writes_is_enabled(pmu_to_vcpu(pmu))) + return NULL; + + return get_gp_pmc(pmu, msr, MSR_IA32_PMC0); +} + static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: ret = pmu->version > 1; break; + case MSR_IA32_PERF_CAPABILITIES: + ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM); + break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || - get_fixed_pmc(pmu, msr); + get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr); break; } @@ -184,35 +205,45 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } -static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + u32 msr = msr_info->index; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - *data = pmu->fixed_ctr_ctrl; + msr_info->data = pmu->fixed_ctr_ctrl; return 0; case MSR_CORE_PERF_GLOBAL_STATUS: - *data = pmu->global_status; + msr_info->data = pmu->global_status; return 0; case MSR_CORE_PERF_GLOBAL_CTRL: - *data = pmu->global_ctrl; + msr_info->data = pmu->global_ctrl; return 0; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *data = pmu->global_ovf_ctrl; + msr_info->data = pmu->global_ovf_ctrl; + return 0; + case MSR_IA32_PERF_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return 1; + msr_info->data = vcpu->arch.perf_capabilities; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || + (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); - *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + msr_info->data = + val & pmu->counter_bitmask[KVM_PMC_GP]; return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); - *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; + msr_info->data = + val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - *data = pmc->eventsel; + msr_info->data = pmc->eventsel; return 0; } } @@ -258,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } break; + case MSR_IA32_PERF_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ? + (data & ~vmx_get_perf_capabilities()) : data) + return 1; + vcpu->arch.perf_capabilities = data; + return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { - if (!msr_info->host_initiated) + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || + (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { + if ((msr & MSR_PMC_FULL_WIDTH_BIT) && + (data & ~pmu->counter_bitmask[KVM_PMC_GP])) + return 1; + if (!msr_info->host_initiated && + !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); if (pmc->perf_event) @@ -300,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; + vcpu->arch.perf_capabilities = 0; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry) @@ -312,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) return; perf_get_x86_pmu_capability(&x86_pmu); + if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + vcpu->arch.perf_capabilities = vmx_get_perf_capabilities(); pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, x86_pmu.num_counters_gp); diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 481ad879197b..5c0ff80b85c0 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -19,7 +19,7 @@ struct vmcs_hdr { struct vmcs { struct vmcs_hdr hdr; u32 abort; - char data[0]; + char data[]; }; DECLARE_PER_CPU(struct vmcs *, current_vmcs); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 53dfb401316d..c8e51c004f78 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -115,10 +115,6 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), FIELD(CR4_READ_SHADOW, cr4_read_shadow), - FIELD(CR3_TARGET_VALUE0, cr3_target_value0), - FIELD(CR3_TARGET_VALUE1, cr3_target_value1), - FIELD(CR3_TARGET_VALUE2, cr3_target_value2), - FIELD(CR3_TARGET_VALUE3, cr3_target_value3), FIELD(EXIT_QUALIFICATION, exit_qualification), FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address), FIELD(GUEST_CR0, guest_cr0), diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index d0c6df373f67..80232daf00ff 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -80,10 +80,7 @@ struct __packed vmcs12 { natural_width cr4_guest_host_mask; natural_width cr0_read_shadow; natural_width cr4_read_shadow; - natural_width cr3_target_value0; - natural_width cr3_target_value1; - natural_width cr3_target_value2; - natural_width cr3_target_value3; + natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */ natural_width exit_qualification; natural_width guest_linear_address; natural_width guest_cr0; @@ -263,10 +260,7 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(cr4_guest_host_mask, 352); CHECK_OFFSET(cr0_read_shadow, 360); CHECK_OFFSET(cr4_read_shadow, 368); - CHECK_OFFSET(cr3_target_value0, 376); - CHECK_OFFSET(cr3_target_value1, 384); - CHECK_OFFSET(cr3_target_value2, 392); - CHECK_OFFSET(cr3_target_value3, 400); + CHECK_OFFSET(dead_space, 376); CHECK_OFFSET(exit_qualification, 408); CHECK_OFFSET(guest_linear_address, 416); CHECK_OFFSET(guest_cr0, 424); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 51d1a82742fd..e0a182cb3cdd 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -166,13 +166,13 @@ SYM_FUNC_START(__vmx_vcpu_run) mov WORD_SIZE(%_ASM_SP), %_ASM_AX /* Save all guest registers, including RAX from the stack */ - __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX) - mov %_ASM_CX, VCPU_RCX(%_ASM_AX) - mov %_ASM_DX, VCPU_RDX(%_ASM_AX) - mov %_ASM_BX, VCPU_RBX(%_ASM_AX) - mov %_ASM_BP, VCPU_RBP(%_ASM_AX) - mov %_ASM_SI, VCPU_RSI(%_ASM_AX) - mov %_ASM_DI, VCPU_RDI(%_ASM_AX) + pop VCPU_RAX(%_ASM_AX) + mov %_ASM_CX, VCPU_RCX(%_ASM_AX) + mov %_ASM_DX, VCPU_RDX(%_ASM_AX) + mov %_ASM_BX, VCPU_RBX(%_ASM_AX) + mov %_ASM_BP, VCPU_RBP(%_ASM_AX) + mov %_ASM_SI, VCPU_RSI(%_ASM_AX) + mov %_ASM_DI, VCPU_RDI(%_ASM_AX) #ifdef CONFIG_X86_64 mov %r8, VCPU_R8 (%_ASM_AX) mov %r9, VCPU_R9 (%_ASM_AX) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 89c766fad889..170cc76a581f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -437,6 +437,11 @@ static const struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + static unsigned long host_idt_base; /* @@ -1306,10 +1311,12 @@ after_clear_sn: pi_set_on(pi_desc); } -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy) { struct vcpu_vmx *vmx = to_vmx(vcpu); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; + struct vmcs *prev; if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); @@ -1328,16 +1335,28 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) local_irq_enable(); } - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + prev = per_cpu(current_vmcs, cpu); + if (prev != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); + + /* + * No indirect branch prediction barrier needed when switching + * the active VMCS within a guest, e.g. on nested VM-Enter. + * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + */ + if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) + indirect_branch_prediction_barrier(); } if (!already_loaded) { void *gdt = get_current_gdt_ro(); unsigned long sysenter_esp; + /* + * Flush all EPTP/VPID contexts, the new pCPU may have stale + * TLB entries from its previous association with the vCPU. + */ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); /* @@ -1364,11 +1383,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); @@ -1546,7 +1565,7 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { - unsigned long rip; + unsigned long rip, orig_rip; /* * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on @@ -1558,8 +1577,17 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) */ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + orig_rip = kvm_rip_read(vcpu); + rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); +#ifdef CONFIG_X86_64 + /* + * We need to mask out the high 32 bits of RIP if not in 64-bit + * mode, but just finding out that we are in 64-bit mode is + * quite expensive. Only do it if there was a carry. + */ + if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu)) + rip = (u32)rip; +#endif kvm_rip_write(vcpu, rip); } else { if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) @@ -1712,17 +1740,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx_update_msr_bitmap(&vmx->vcpu); } -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - return vcpu->arch.tsc_offset - vmcs12->tsc_offset; - - return vcpu->arch.tsc_offset; -} - static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -1771,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + case MSR_IA32_PERF_CAPABILITIES: + msr->data = vmx_get_perf_capabilities(); + return 0; default: return 1; } @@ -1926,6 +1946,16 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, + u64 data) +{ +#ifdef CONFIG_X86_64 + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return (u32)data; +#endif + return (unsigned long)data; +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -1963,13 +1993,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_write32(GUEST_SYSENTER_CS, data); break; case MSR_IA32_SYSENTER_EIP: - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + data = nested_vmx_truncate_sysenter_addr(vcpu, data); get_vmcs12(vcpu)->guest_sysenter_eip = data; + } vmcs_writel(GUEST_SYSENTER_EIP, data); break; case MSR_IA32_SYSENTER_ESP: - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + data = nested_vmx_truncate_sysenter_addr(vcpu, data); get_vmcs12(vcpu)->guest_sysenter_esp = data; + } vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_DEBUGCTLMSR: @@ -2187,6 +2221,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + unsigned long guest_owned_bits; + kvm_register_mark_available(vcpu, reg); switch (reg) { @@ -2200,10 +2236,22 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) if (enable_ept) ept_save_pdptrs(vcpu); break; + case VCPU_EXREG_CR0: + guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; + + vcpu->arch.cr0 &= ~guest_owned_bits; + vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits; + break; case VCPU_EXREG_CR3: if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); break; + case VCPU_EXREG_CR4: + guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; + + vcpu->arch.cr4 &= ~guest_owned_bits; + vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits; + break; default: WARN_ON_ONCE(1); break; @@ -2837,34 +2885,64 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) { - int vpid = to_vmx(vcpu)->vpid; - - if (!vpid_sync_vcpu_addr(vpid, addr)) - vpid_sync_context(vpid); + struct vcpu_vmx *vmx = to_vmx(vcpu); /* - * If VPIDs are not supported or enabled, then the above is a no-op. - * But we don't really need a TLB flush in that case anyway, because - * each VM entry/exit includes an implicit flush when VPID is 0. + * INVEPT must be issued when EPT is enabled, irrespective of VPID, as + * the CPU is not required to invalidate guest-physical mappings on + * VM-Entry, even if VPID is disabled. Guest-physical mappings are + * associated with the root EPT structure and not any particular VPID + * (INVVPID also isn't required to invalidate guest-physical mappings). */ + if (enable_ept) { + ept_sync_global(); + } else if (enable_vpid) { + if (cpu_has_vmx_invvpid_global()) { + vpid_sync_vcpu_global(); + } else { + vpid_sync_vcpu_single(vmx->vpid); + vpid_sync_vcpu_single(vmx->nested.vpid02); + } + } } -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; + u64 root_hpa = vcpu->arch.mmu->root_hpa; - vcpu->arch.cr0 &= ~cr0_guest_owned_bits; - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; + /* No flush required if the current context is invalid. */ + if (!VALID_PAGE(root_hpa)) + return; + + if (enable_ept) + ept_sync_context(construct_eptp(vcpu, root_hpa)); + else if (!is_guest_mode(vcpu)) + vpid_sync_context(to_vmx(vcpu)->vpid); + else + vpid_sync_context(nested_get_vpid02(vcpu)); } -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; + /* + * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in + * vmx_flush_tlb_guest() for an explanation of why this is ok. + */ + vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr); +} - vcpu->arch.cr4 &= ~cr4_guest_owned_bits; - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; +static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) +{ + /* + * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 + * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit + * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is + * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), + * i.e. no explicit INVVPID is necessary. + */ + vpid_sync_context(to_vmx(vcpu)->vpid); } static void ept_load_pdptrs(struct kvm_vcpu *vcpu) @@ -2886,12 +2964,13 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (is_pae_paging(vcpu)) { - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); - } + if (WARN_ON_ONCE(!is_pae_paging(vcpu))) + return; + + mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } @@ -2955,20 +3034,27 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); /* depends on vcpu->arch.cr0 to be set to a new value */ vmx->emulation_required = emulation_required(vcpu); } -static int get_ept_level(struct kvm_vcpu *vcpu) +static int vmx_get_tdp_level(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) - return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu)); if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; } +static int get_ept_level(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu)); + + return vmx_get_tdp_level(vcpu); +} + u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) { u64 eptp = VMX_EPTP_MT_WB; @@ -2983,16 +3069,15 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) return eptp; } -void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3) +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd) { struct kvm *kvm = vcpu->kvm; bool update_guest_cr3 = true; unsigned long guest_cr3; u64 eptp; - guest_cr3 = cr3; if (enable_ept) { - eptp = construct_eptp(vcpu, cr3); + eptp = construct_eptp(vcpu, pgd); vmcs_write64(EPT_POINTER, eptp); if (kvm_x86_ops.tlb_remote_flush) { @@ -3003,16 +3088,15 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3) spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); } - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (!enable_unrestricted_guest && !is_paging(vcpu)) + if (!enable_unrestricted_guest && !is_paging(vcpu)) guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) guest_cr3 = vcpu->arch.cr3; else /* vmcs01.GUEST_CR3 is already up-to-date. */ update_guest_cr3 = false; ept_load_pdptrs(vcpu); + } else { + guest_cr3 = pgd; } if (update_guest_cr3) @@ -3063,6 +3147,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; vcpu->arch.cr4 = cr4; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); if (!enable_unrestricted_guest) { if (enable_ept) { @@ -3851,7 +3936,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + if (vcpu != kvm_get_running_vcpu() && + !kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); return 0; @@ -4147,8 +4233,7 @@ static void ept_set_mmio_spte_mask(void) * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); } #define VMX_XSS_EXIT_BITMAP 0 @@ -4453,31 +4538,54 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) +bool vmx_nmi_blocked(struct kvm_vcpu *vcpu) { - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; + if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) + return false; - if (!enable_vnmi && - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) - return 0; + if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) + return true; - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); + return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_NMI)); } -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) - return false; + return -EBUSY; + + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) + return -EBUSY; + + return !vmx_nmi_blocked(vcpu); +} +bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) +{ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) - return true; + return false; - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); + return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); +} + +static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + if (to_vmx(vcpu)->nested.nested_run_pending) + return -EBUSY; + + /* + * An IRQ must not be injected into L2 if it's supposed to VM-Exit, + * e.g. if the IRQ arrived asynchronously after checking nested events. + */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return -EBUSY; + + return !vmx_interrupt_blocked(vcpu); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4518,10 +4626,8 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) return false; /* fall through */ case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; - /* fall through */ + return !(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: @@ -4616,7 +4722,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 vect_info; vect_info = vmx->idt_vectoring_info; - intr_info = vmx->exit_intr_info; + intr_info = vmx_get_intr_info(vcpu); if (is_machine_check(intr_info) || is_nmi(intr_info)) return 1; /* handled by handle_exception_nmi_irqoff() */ @@ -4660,9 +4766,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) } if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); + cr2 = vmx_get_exit_qual(vcpu); /* EPT won't cause page fault directly */ - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); + WARN_ON_ONCE(!vcpu->arch.apf.host_apf_flags && enable_ept); return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); } @@ -4673,7 +4779,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) switch (ex_no) { case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); + dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { if (is_icebp(intr_info)) @@ -4740,7 +4846,7 @@ static int handle_io(struct kvm_vcpu *vcpu) int size, in, string; unsigned port; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); string = (exit_qualification & 16) != 0; ++vcpu->stat.io_exits; @@ -4831,7 +4937,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) int err; int ret; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { @@ -4908,7 +5014,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) unsigned long exit_qualification; int dr, dr7, reg; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); dr = exit_qualification & DEBUG_REG_ACCESS_NUM; /* First, if DR does not exist, trigger UD */ @@ -5010,7 +5116,7 @@ static int handle_invd(struct kvm_vcpu *vcpu) static int handle_invlpg(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); kvm_mmu_invlpg(vcpu, exit_qualification); return kvm_skip_emulated_instruction(vcpu); @@ -5042,7 +5148,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int access_type, offset; access_type = exit_qualification & APIC_ACCESS_TYPE; @@ -5063,7 +5169,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int vector = exit_qualification & 0xff; /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ @@ -5073,7 +5179,7 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) static int handle_apic_write(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 offset = exit_qualification & 0xfff; /* APIC-write VM exit is trap-like and thus no need to adjust IP */ @@ -5094,7 +5200,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); reason = (u32)exit_qualification >> 30; if (reason == TASK_SWITCH_GATE && idt_v) { @@ -5144,7 +5250,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa_t gpa; u64 error_code; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); /* * EPT violation happened while executing iret from NMI, @@ -5216,18 +5322,11 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) bool intr_window_requested; unsigned count = 130; - /* - * We should never reach the point where we are emulating L2 - * due to invalid guest state as that means we incorrectly - * allowed a nested VMEntry with an invalid vmcs12. - */ - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - intr_window_requested = exec_controls_get(vmx) & CPU_BASED_INTR_WINDOW_EXITING; while (vmx->emulation_required && count-- != 0) { - if (intr_window_requested && vmx_interrupt_allowed(vcpu)) + if (intr_window_requested && !vmx_interrupt_blocked(vcpu)) return handle_interrupt_window(&vmx->vcpu); if (kvm_test_request(KVM_REQ_EVENT, vcpu)) @@ -5404,13 +5503,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) /* According to the Intel instruction reference, the memory operand * is read even if it isn't needed (e.g., for type==all) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } @@ -5439,11 +5538,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) if (kvm_get_active_pcid(vcpu) == operand.pcid) { kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd) == operand.pcid) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); @@ -5480,7 +5579,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) trace_kvm_pml_full(vcpu->vcpu_id); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); /* * PML buffer FULL happened while executing iret from NMI, @@ -5499,14 +5598,22 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } -static int handle_preemption_timer(struct kvm_vcpu *vcpu) +static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); if (!vmx->req_immediate_exit && - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) + !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) { kvm_lapic_expired_hv_timer(vcpu); + return EXIT_FASTPATH_REENTER_GUEST; + } + + return EXIT_FASTPATH_NONE; +} +static int handle_preemption_timer(struct kvm_vcpu *vcpu) +{ + handle_fastpath_preemption_timer(vcpu); return 1; } @@ -5594,8 +5701,8 @@ static const int kvm_vmx_max_exit_handlers = static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); + *info1 = vmx_get_exit_qual(vcpu); + *info2 = vmx_get_intr_info(vcpu); } static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) @@ -5677,7 +5784,6 @@ void dump_vmcs(void) u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; unsigned long cr4; u64 efer; - int i, n; if (!dump_invalid_vmcs) { pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); @@ -5814,14 +5920,6 @@ void dump_vmcs(void) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); - n = vmcs_read32(CR3_TARGET_COUNT); - for (i = 0; i + 1 < n; i += 4) - pr_err("CR3 target%u=%016lx target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); - if (i < n) - pr_err("CR3 target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) pr_err("PLE Gap=%08x Window=%08x\n", vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); @@ -5834,15 +5932,12 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) +static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before @@ -5853,6 +5948,14 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, if (enable_pml) vmx_flush_pml_buffer(vcpu); + /* + * We should never reach this point with a pending nested VM-Enter, and + * more specifically emulation of L2 due to invalid guest state (see + * below) should never happen as that means we incorrectly allowed a + * nested VM-Enter with an invalid vmcs12. + */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + /* If guest state is invalid, start emulating */ if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); @@ -5871,8 +5974,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, */ nested_mark_vmcs12_pages_dirty(vcpu); - if (nested_vmx_exit_reflected(vcpu, exit_reason)) - return nested_vmx_reflect_vmexit(vcpu, exit_reason); + if (nested_vmx_reflect_vmexit(vcpu)) + return 1; } if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { @@ -5919,7 +6022,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, if (unlikely(!enable_vnmi && vmx->loaded_vmcs->soft_vnmi_blocked)) { - if (vmx_interrupt_allowed(vcpu)) { + if (!vmx_interrupt_blocked(vcpu)) { vmx->loaded_vmcs->soft_vnmi_blocked = 0; } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && vcpu->arch.nmi_pending) { @@ -5936,10 +6039,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, } } - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); + if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - } if (exit_reason >= kvm_vmx_max_exit_handlers) goto unexpected_vmexit; @@ -6093,7 +6194,15 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (flexpriority_enabled) { sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + + /* + * Flush the TLB, reloading the APIC access page will + * only do so if its physical address has changed, but + * the guest may have inserted a non-APIC mapping into + * the TLB while the APIC access page was disabled. + */ + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } break; case LAPIC_MODE_X2APIC: @@ -6107,12 +6216,32 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) vmx_update_msr_bitmap(vcpu); } -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu)) { - vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb(vcpu, true); + struct page *page; + + /* Defer reload until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; + return; } + + if (!(secondary_exec_controls_get(to_vmx(vcpu)) & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + return; + + page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; + + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); + vmx_flush_tlb_current(vcpu); + + /* + * Do not pin apic access page in memory, the MMU notifier + * will call us again if it is migrated or swapped out. + */ + put_page(page); } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -6230,16 +6359,16 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) { - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + u32 intr_info = vmx_get_intr_info(&vmx->vcpu); /* if exit due to PF check for async PF */ - if (is_page_fault(vmx->exit_intr_info)) { - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + if (is_page_fault(intr_info)) { + vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); /* Handle machine checks before interrupts are enabled */ - } else if (is_machine_check(vmx->exit_intr_info)) { + } else if (is_machine_check(intr_info)) { kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - } else if (is_nmi(vmx->exit_intr_info)) { + } else if (is_nmi(intr_info)) { kvm_before_interrupt(&vmx->vcpu); asm("int $2"); kvm_after_interrupt(&vmx->vcpu); @@ -6254,9 +6383,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) unsigned long tmp; #endif gate_desc *desc; - u32 intr_info; + u32 intr_info = vmx_get_intr_info(vcpu); - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (WARN_ONCE(!is_external_intr(intr_info), "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; @@ -6269,13 +6397,13 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) asm volatile( #ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" + "mov %%rsp, %[sp]\n\t" + "and $-16, %%rsp\n\t" + "push %[ss]\n\t" "push %[sp]\n\t" #endif "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" + "push %[cs]\n\t" CALL_NOSPEC : #ifdef CONFIG_X86_64 @@ -6284,7 +6412,9 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) ASM_CALL_CONSTRAINT : [thunk_target]"r"(entry), +#ifdef CONFIG_X86_64 [ss]"i"(__KERNEL_DS), +#endif [cs]"i"(__KERNEL_CS) ); @@ -6292,8 +6422,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6301,12 +6430,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) handle_exception_nmi_irqoff(vmx); - else if (!is_guest_mode(vcpu) && - vmx->exit_reason == EXIT_REASON_MSR_WRITE) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } -static bool vmx_has_emulated_msr(int index) +static bool vmx_has_emulated_msr(u32 index) { switch (index) { case MSR_IA32_SMBASE: @@ -6337,11 +6463,8 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) if (enable_vnmi) { if (vmx->loaded_vmcs->nmi_known_unmasked) return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + exit_intr_info = vmx_get_intr_info(&vmx->vcpu); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -6508,13 +6631,27 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + switch (to_vmx(vcpu)->exit_reason) { + case EXIT_REASON_MSR_WRITE: + return handle_fastpath_set_msr_irqoff(vcpu); + case EXIT_REASON_PREEMPTION_TIMER: + return handle_fastpath_preemption_timer(vcpu); + default: + return EXIT_FASTPATH_NONE; + } +} + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { + fastpath_t exit_fastpath; struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; +reenter_guest: /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && vmx->loaded_vmcs->soft_vnmi_blocked)) @@ -6523,7 +6660,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ if (vmx->emulation_required) - return; + return EXIT_FASTPATH_NONE; if (vmx->ple_window_dirty) { vmx->ple_window_dirty = false; @@ -6643,12 +6780,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) loadsegment(es, __USER_DS); #endif - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR3)); - vcpu->arch.regs_dirty = 0; + vmx_register_cache_reset(vcpu); pt_guest_exit(vmx); @@ -6657,18 +6789,45 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) + if (unlikely(vmx->fail)) { + vmx->exit_reason = 0xdead; + return EXIT_FASTPATH_NONE; + } + + vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); + if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)) kvm_machine_check(); - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - return; + trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); + + if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + return EXIT_FASTPATH_NONE; vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); + + if (is_guest_mode(vcpu)) + return EXIT_FASTPATH_NONE; + + exit_fastpath = vmx_exit_handlers_fastpath(vcpu); + if (exit_fastpath == EXIT_FASTPATH_REENTER_GUEST) { + if (!kvm_vcpu_exit_request(vcpu)) { + /* + * FIXME: this goto should be a loop in vcpu_enter_guest, + * but it would incur the cost of a retpoline for now. + * Revisit once static calls are available. + */ + if (vcpu->arch.apicv_active) + vmx_sync_pir_to_irr(vcpu); + goto reenter_guest; + } + exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; + } + + return exit_fastpath; } static void vmx_free_vcpu(struct kvm_vcpu *vcpu) @@ -7138,6 +7297,9 @@ static __init void vmx_set_cpu_caps(void) /* CPUID 0x80000001 */ if (!cpu_has_vmx_rdtscp()) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + + if (vmx_waitpkg_supported()) + kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -7253,10 +7415,6 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - if (kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)) - return -EOPNOTSUPP; - vmx = to_vmx(vcpu); tscl = rdtsc(); guest_tscl = kvm_read_l1_tsc(vcpu, tscl); @@ -7599,12 +7757,12 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEAT_CTL_LMCE_ENABLED; } -static int vmx_smi_allowed(struct kvm_vcpu *vcpu) +static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - return 1; + return -EBUSY; + return !is_smm(vcpu); } static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -7641,9 +7799,9 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) return 0; } -static int enable_smi_window(struct kvm_vcpu *vcpu) +static void enable_smi_window(struct kvm_vcpu *vcpu) { - return 0; + /* RSM will cause a vmexit anyway. */ } static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) @@ -7656,6 +7814,16 @@ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->nested.vmxon; } +static void vmx_migrate_timers(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) { + struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; + + if (hrtimer_try_to_cancel(timer) == 1) + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); + } +} + static void hardware_unsetup(void) { if (nested) @@ -7700,8 +7868,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_segment = vmx_set_segment, .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr4 = vmx_set_cr4, .set_efer = vmx_set_efer, @@ -7715,8 +7881,10 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .tlb_flush = vmx_flush_tlb, + .tlb_flush_all = vmx_flush_tlb_all, + .tlb_flush_current = vmx_flush_tlb_current, .tlb_flush_gva = vmx_flush_tlb_gva, + .tlb_flush_guest = vmx_flush_tlb_guest, .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, @@ -7751,7 +7919,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_tss_addr = vmx_set_tss_addr, .set_identity_map_addr = vmx_set_identity_map_addr, - .get_tdp_level = get_ept_level, + .get_tdp_level = vmx_get_tdp_level, .get_mt_mask = vmx_get_mt_mask, .get_exit_info = vmx_get_exit_info, @@ -7760,7 +7928,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - .read_l1_tsc_offset = vmx_read_l1_tsc_offset, .write_l1_tsc_offset = vmx_write_l1_tsc_offset, .load_mmu_pgd = vmx_load_mmu_pgd, @@ -7782,6 +7949,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .post_block = vmx_post_block, .pmu_ops = &intel_pmu_ops, + .nested_ops = &vmx_nested_ops, .update_pi_irte = vmx_update_pi_irte, @@ -7797,14 +7965,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .pre_leave_smm = vmx_pre_leave_smm, .enable_smi_window = enable_smi_window, - .check_nested_events = NULL, - .get_nested_state = NULL, - .set_nested_state = NULL, - .get_vmcs12_pages = NULL, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .migrate_timers = vmx_migrate_timers, }; static __init int hardware_setup(void) @@ -7903,11 +8066,11 @@ static __init int hardware_setup(void) if (!enable_ept) ept_lpage_level = 0; else if (cpu_has_vmx_ept_1g_page()) - ept_lpage_level = PT_PDPE_LEVEL; + ept_lpage_level = PG_LEVEL_1G; else if (cpu_has_vmx_ept_2m_page()) - ept_lpage_level = PT_DIRECTORY_LEVEL; + ept_lpage_level = PG_LEVEL_2M; else - ept_lpage_level = PT_PAGE_TABLE_LEVEL; + ept_lpage_level = PG_LEVEL_4K; kvm_configure_mmu(enable_ept, ept_lpage_level); /* @@ -7967,8 +8130,7 @@ static __init int hardware_setup(void) nested_vmx_setup_ctls_msrs(&vmcs_config.nested, vmx_capability.ept); - r = nested_vmx_hardware_setup(&vmx_x86_ops, - kvm_vmx_exit_handlers); + r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) return r; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index aab9df55336e..672c28f17e49 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,6 +8,7 @@ #include <asm/intel_pt.h> #include "capabilities.h" +#include "kvm_cache_regs.h" #include "ops.h" #include "vmcs.h" @@ -136,6 +137,7 @@ struct nested_vmx { bool vmcs02_initialized; bool change_vmcs01_virtual_apic_mode; + bool reload_vmcs01_apic_access_page; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -167,6 +169,8 @@ struct nested_vmx { u16 posted_intr_nv; struct hrtimer preemption_timer; + u64 preemption_timer_deadline; + bool has_preemption_timer_deadline; bool preemption_timer_expired; /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ @@ -208,6 +212,7 @@ struct vcpu_vmx { */ bool guest_state_loaded; + unsigned long exit_qualification; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -317,8 +322,8 @@ struct kvm_vmx { }; bool nested_vmx_allowed(struct kvm_vcpu *vcpu); -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy); int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); @@ -341,6 +346,8 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); void update_exception_bitmap(struct kvm_vcpu *vcpu); void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); +bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); @@ -441,9 +448,18 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) { - vmx->segment_cache.bitmask = 0; + vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) + | (1 << VCPU_EXREG_CR0) + | (1 << VCPU_EXREG_CR3) + | (1 << VCPU_EXREG_CR4) + | (1 << VCPU_EXREG_EXIT_INFO_1) + | (1 << VCPU_EXREG_EXIT_INFO_2)); + vcpu->arch.regs_dirty = 0; } static inline u32 vmx_vmentry_ctrl(void) @@ -486,6 +502,28 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) return &(to_vmx(vcpu)->pi_desc); } +static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1); + vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + } + return vmx->exit_qualification; +} + +static inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2); + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + } + return vmx->exit_intr_info; +} + struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); void free_vmcs(struct vmcs *vmcs); int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); @@ -500,24 +538,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow) u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, - bool invalidate_gpa) -{ - if (enable_ept && (invalidate_gpa || !enable_vpid)) { - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - ept_sync_context(construct_eptp(vcpu, - vcpu->arch.mmu->root_hpa)); - } else { - vpid_sync_context(vpid); - } -} - -static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) -{ - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); -} - static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) { vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c17e6eb9ad43..9e41b5135340 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include "irq.h" +#include "ioapic.h" #include "mmu.h" #include "i8254.h" #include "tss.h" @@ -97,9 +98,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ - #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -194,45 +192,46 @@ u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); struct kvm_stats_debugfs_item debugfs_entries[] = { - { "pf_fixed", VCPU_STAT(pf_fixed) }, - { "pf_guest", VCPU_STAT(pf_guest) }, - { "tlb_flush", VCPU_STAT(tlb_flush) }, - { "invlpg", VCPU_STAT(invlpg) }, - { "exits", VCPU_STAT(exits) }, - { "io_exits", VCPU_STAT(io_exits) }, - { "mmio_exits", VCPU_STAT(mmio_exits) }, - { "signal_exits", VCPU_STAT(signal_exits) }, - { "irq_window", VCPU_STAT(irq_window_exits) }, - { "nmi_window", VCPU_STAT(nmi_window_exits) }, - { "halt_exits", VCPU_STAT(halt_exits) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "hypercalls", VCPU_STAT(hypercalls) }, - { "request_irq", VCPU_STAT(request_irq_exits) }, - { "irq_exits", VCPU_STAT(irq_exits) }, - { "host_state_reload", VCPU_STAT(host_state_reload) }, - { "fpu_reload", VCPU_STAT(fpu_reload) }, - { "insn_emulation", VCPU_STAT(insn_emulation) }, - { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, - { "irq_injections", VCPU_STAT(irq_injections) }, - { "nmi_injections", VCPU_STAT(nmi_injections) }, - { "req_event", VCPU_STAT(req_event) }, - { "l1d_flush", VCPU_STAT(l1d_flush) }, - { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, - { "mmu_pte_write", VM_STAT(mmu_pte_write) }, - { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, - { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, - { "mmu_flooded", VM_STAT(mmu_flooded) }, - { "mmu_recycled", VM_STAT(mmu_recycled) }, - { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, - { "mmu_unsync", VM_STAT(mmu_unsync) }, - { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages, .mode = 0444) }, - { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, - { "max_mmu_page_hash_collisions", - VM_STAT(max_mmu_page_hash_collisions) }, + VCPU_STAT("pf_fixed", pf_fixed), + VCPU_STAT("pf_guest", pf_guest), + VCPU_STAT("tlb_flush", tlb_flush), + VCPU_STAT("invlpg", invlpg), + VCPU_STAT("exits", exits), + VCPU_STAT("io_exits", io_exits), + VCPU_STAT("mmio_exits", mmio_exits), + VCPU_STAT("signal_exits", signal_exits), + VCPU_STAT("irq_window", irq_window_exits), + VCPU_STAT("nmi_window", nmi_window_exits), + VCPU_STAT("halt_exits", halt_exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("hypercalls", hypercalls), + VCPU_STAT("request_irq", request_irq_exits), + VCPU_STAT("irq_exits", irq_exits), + VCPU_STAT("host_state_reload", host_state_reload), + VCPU_STAT("fpu_reload", fpu_reload), + VCPU_STAT("insn_emulation", insn_emulation), + VCPU_STAT("insn_emulation_fail", insn_emulation_fail), + VCPU_STAT("irq_injections", irq_injections), + VCPU_STAT("nmi_injections", nmi_injections), + VCPU_STAT("req_event", req_event), + VCPU_STAT("l1d_flush", l1d_flush), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), + VM_STAT("mmu_pte_write", mmu_pte_write), + VM_STAT("mmu_pte_updated", mmu_pte_updated), + VM_STAT("mmu_pde_zapped", mmu_pde_zapped), + VM_STAT("mmu_flooded", mmu_flooded), + VM_STAT("mmu_recycled", mmu_recycled), + VM_STAT("mmu_cache_miss", mmu_cache_miss), + VM_STAT("mmu_unsync", mmu_unsync), + VM_STAT("remote_tlb_flush", remote_tlb_flush), + VM_STAT("largepages", lpages, .mode = 0444), + VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444), + VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions), { NULL } }; @@ -261,7 +260,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; - for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) + for (i = 0; i < ASYNC_PF_PER_VCPU; i++) vcpu->arch.apf.gfns[i] = ~0; } @@ -612,15 +611,28 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); -static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault) { - if (mmu_is_nested(vcpu) && !fault->nested_page_fault) - vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); - else - vcpu->arch.mmu->inject_page_fault(vcpu, fault); + struct kvm_mmu *fault_mmu; + WARN_ON_ONCE(fault->vector != PF_VECTOR); + + fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : + vcpu->arch.walk_mmu; + + /* + * Invalidate the TLB entry for the faulting address, if it exists, + * else the access will fault indefinitely (and to emulate hardware). + */ + if ((fault->error_code & PFERR_PRESENT_MASK) && + !(fault->error_code & PFERR_RSVD_MASK)) + kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, + fault_mmu->root_hpa); + fault_mmu->inject_page_fault(vcpu, fault); return fault->nested_page_fault; } +EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1008,7 +1020,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { if (!skip_tlb_flush) { kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } return 0; } @@ -1020,7 +1032,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; - kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); + kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush); vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); @@ -1060,7 +1072,7 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu) } } -static void kvm_update_dr7(struct kvm_vcpu *vcpu) +void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -1073,6 +1085,7 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) if (dr7 & DR7_BP_EN_MASK) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } +EXPORT_SYMBOL_GPL(kvm_update_dr7); static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { @@ -1233,13 +1246,18 @@ static const u32 emulated_msrs_all[] = { HV_X64_MSR_VP_ASSIST_PAGE, HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, HV_X64_MSR_TSC_EMULATION_STATUS, + HV_X64_MSR_SYNDBG_OPTIONS, + HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, + HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, + HV_X64_MSR_SYNDBG_PENDING_BUFFER, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, - MSR_KVM_PV_EOI_EN, + MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, @@ -1306,6 +1324,7 @@ static const u32 msr_based_features_all[] = { MSR_F10H_DECFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, }; static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; @@ -1564,6 +1583,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) +{ + return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || + need_resched() || signal_pending(current); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request); + /* * The fast path for frequent and performance sensitive wrmsr emulation, * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces @@ -1592,27 +1618,44 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data return 1; } -enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) +{ + if (!kvm_can_use_hv_timer(vcpu)) + return 1; + + kvm_set_lapic_tscdeadline_msr(vcpu, data); + return 0; +} + +fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - int ret = 0; + fastpath_t ret = EXIT_FASTPATH_NONE; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); - ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { + kvm_skip_emulated_instruction(vcpu); + ret = EXIT_FASTPATH_EXIT_HANDLED; + } + break; + case MSR_IA32_TSCDEADLINE: + data = kvm_read_edx_eax(vcpu); + if (!handle_fastpath_set_tscdeadline(vcpu, data)) { + kvm_skip_emulated_instruction(vcpu); + ret = EXIT_FASTPATH_REENTER_GUEST; + } break; default: - return EXIT_FASTPATH_NONE; + break; } - if (!ret) { + if (ret != EXIT_FASTPATH_NONE) trace_kvm_msr_write(msr, data); - return EXIT_FASTPATH_SKIP_EMUL_INS; - } - return EXIT_FASTPATH_NONE; + return ret; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); @@ -1901,7 +1944,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) { - u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); + u64 curr_offset = vcpu->arch.l1_tsc_offset; vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } @@ -1943,14 +1986,13 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); - - return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); + return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { + vcpu->arch.l1_tsc_offset = offset; vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset); } @@ -2075,7 +2117,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); + u64 tsc_offset = vcpu->arch.l1_tsc_offset; kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); } @@ -2637,29 +2679,54 @@ out: return r; } +static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) +{ + u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT; + + return (vcpu->arch.apf.msr_en_val & mask) == mask; +} + static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 3:5 are reserved, Should be zero */ - if (data & 0x38) + /* Bits 4:5 are reserved, Should be zero */ + if (data & 0x30) return 1; - vcpu->arch.apf.msr_val = data; + vcpu->arch.apf.msr_en_val = data; - if (!(data & KVM_ASYNC_PF_ENABLED)) { + if (!kvm_pv_async_pf_enabled(vcpu)) { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); return 0; } if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, - sizeof(u32))) + sizeof(u64))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + kvm_async_pf_wakeup_all(vcpu); + + return 0; +} + +static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) +{ + /* Bits 8-63 are reserved */ + if (data >> 8) + return 1; + + if (!lapic_in_kernel(vcpu)) + return 1; + + vcpu->arch.apf.msr_int_val = data; + + vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK; + return 0; } @@ -2669,10 +2736,16 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) vcpu->arch.time = 0; } -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops.tlb_flush_all(vcpu); +} + +static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; - kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa); + kvm_x86_ops.tlb_flush_guest(vcpu); } static void record_steal_time(struct kvm_vcpu *vcpu) @@ -2698,7 +2771,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) trace_kvm_pv_tlb_flush(vcpu->vcpu_id, st->preempted & KVM_VCPU_FLUSH_TLB); if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) - kvm_vcpu_flush_tlb(vcpu, false); + kvm_vcpu_flush_tlb_guest(vcpu); vcpu->arch.st.preempted = 0; @@ -2875,6 +2948,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pv_enable_async_pf(vcpu, data)) return 1; break; + case MSR_KVM_ASYNC_PF_INT: + if (kvm_pv_enable_async_pf_int(vcpu, data)) + return 1; + break; + case MSR_KVM_ASYNC_PF_ACK: + if (data & 0x1) { + vcpu->arch.apf.pageready_pending = false; + kvm_check_async_pf_completion(vcpu); + } + break; case MSR_KVM_STEAL_TIME: if (unlikely(!sched_info_on())) @@ -2932,6 +3015,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: @@ -3071,7 +3156,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) - return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); + return kvm_pmu_get_msr(vcpu, msr_info); msr_info->data = 0; break; case MSR_IA32_UCODE_REV: @@ -3149,7 +3234,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: - msr_info->data = vcpu->arch.apf.msr_val; + msr_info->data = vcpu->arch.apf.msr_en_val; + break; + case MSR_KVM_ASYNC_PF_INT: + msr_info->data = vcpu->arch.apf.msr_int_val; + break; + case MSR_KVM_ASYNC_PF_ACK: + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: msr_info->data = vcpu->arch.st.msr_val; @@ -3187,6 +3278,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x20000000; break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: @@ -3233,7 +3326,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) - return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); + return kvm_pmu_get_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index); @@ -3363,6 +3456,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_ASYNC_PF_INT: case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: @@ -3431,14 +3525,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_X2APIC_API_VALID_FLAGS; break; case KVM_CAP_NESTED_STATE: - r = kvm_x86_ops.get_nested_state ? - kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0; + r = kvm_x86_ops.nested_ops->get_state ? + kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0; break; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: r = kvm_x86_ops.enable_direct_tlbflush != NULL; break; case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: - r = kvm_x86_ops.nested_enable_evmcs != NULL; + r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; break; default: break; @@ -4226,9 +4320,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return kvm_hv_activate_synic(vcpu, cap->cap == KVM_CAP_HYPERV_SYNIC2); case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: - if (!kvm_x86_ops.nested_enable_evmcs) + if (!kvm_x86_ops.nested_ops->enable_evmcs) return -ENOTTY; - r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version); + r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version); if (!r) { user_ptr = (void __user *)(uintptr_t)cap->args[0]; if (copy_to_user(user_ptr, &vmcs_version, @@ -4543,7 +4637,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u32 user_data_size; r = -EINVAL; - if (!kvm_x86_ops.get_nested_state) + if (!kvm_x86_ops.nested_ops->get_state) break; BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); @@ -4551,8 +4645,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (get_user(user_data_size, &user_kvm_nested_state->size)) break; - r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state, - user_data_size); + r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state, + user_data_size); if (r < 0) break; @@ -4573,7 +4667,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, int idx; r = -EINVAL; - if (!kvm_x86_ops.set_nested_state) + if (!kvm_x86_ops.nested_ops->set_state) break; r = -EFAULT; @@ -4586,7 +4680,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (kvm_state.flags & ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE - | KVM_STATE_NESTED_EVMCS)) + | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING + | KVM_STATE_NESTED_GIF_SET)) break; /* nested_run_pending implies guest_mode. */ @@ -4595,7 +4690,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); + r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } @@ -5242,6 +5337,10 @@ static void kvm_init_msr_list(void) if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) continue; break; + case MSR_IA32_UMWAIT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) + continue; + break; case MSR_IA32_RTIT_CTL: case MSR_IA32_RTIT_STATUS: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) @@ -5259,7 +5358,7 @@ static void kvm_init_msr_list(void) !intel_pt_validate_hw_cap(PT_CAP_single_range_output))) continue; break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) || msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >= intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) @@ -5274,7 +5373,7 @@ static void kvm_init_msr_list(void) if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) continue; - } + break; default: break; } @@ -6403,7 +6502,7 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; if (ctxt->exception.vector == PF_VECTOR) - return kvm_propagate_fault(vcpu, &ctxt->exception); + return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); if (ctxt->exception.error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception.vector, @@ -7669,14 +7768,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr); } -static int inject_pending_event(struct kvm_vcpu *vcpu) +static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) { int r; + bool can_inject = true; /* try to reinject previous events if any */ - if (vcpu->arch.exception.injected) + if (vcpu->arch.exception.injected) { kvm_x86_ops.queue_exception(vcpu); + can_inject = false; + } /* * Do not inject an NMI or interrupt if there is a pending * exception. Exceptions and interrupts are recognized at @@ -7692,22 +7794,28 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) * fully complete the previous instruction. */ else if (!vcpu->arch.exception.pending) { - if (vcpu->arch.nmi_injected) + if (vcpu->arch.nmi_injected) { kvm_x86_ops.set_nmi(vcpu); - else if (vcpu->arch.interrupt.injected) + can_inject = false; + } else if (vcpu->arch.interrupt.injected) { kvm_x86_ops.set_irq(vcpu); + can_inject = false; + } } + WARN_ON_ONCE(vcpu->arch.exception.injected && + vcpu->arch.exception.pending); + /* * Call check_nested_events() even if we reinjected a previous event * in order for caller to determine if it should require immediate-exit * from L2 to L1 due to pending L1 events which require exit * from L2 to L1. */ - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) { - r = kvm_x86_ops.check_nested_events(vcpu); - if (r != 0) - return r; + if (is_guest_mode(vcpu)) { + r = kvm_x86_ops.nested_ops->check_events(vcpu); + if (r < 0) + goto busy; } /* try to inject new event if pending */ @@ -7716,7 +7824,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); - WARN_ON_ONCE(vcpu->arch.exception.injected); vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = true; @@ -7725,16 +7832,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) X86_EFLAGS_RF); if (vcpu->arch.exception.nr == DB_VECTOR) { - /* - * This code assumes that nSVM doesn't use - * check_nested_events(). If it does, the - * DR6/DR7 changes should happen before L1 - * gets a #VMEXIT for an intercepted #DB in - * L2. (Under VMX, on the other hand, the - * DR6/DR7 changes should not happen in the - * event of a VM-exit to L1 for an intercepted - * #DB in L2.) - */ kvm_deliver_exception_payload(vcpu); if (vcpu->arch.dr7 & DR7_GD) { vcpu->arch.dr7 &= ~DR7_GD; @@ -7743,42 +7840,72 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) } kvm_x86_ops.queue_exception(vcpu); + can_inject = false; } - /* Don't consider new event if we re-injected an event */ - if (kvm_event_needs_reinjection(vcpu)) - return 0; + /* + * Finally, inject interrupt events. If an event cannot be injected + * due to architectural conditions (e.g. IF=0) a window-open exit + * will re-request KVM_REQ_EVENT. Sometimes however an event is pending + * and can architecturally be injected, but we cannot do it right now: + * an interrupt could have arrived just now and we have to inject it + * as a vmexit, or there could already an event in the queue, which is + * indicated by can_inject. In that case we request an immediate exit + * in order to make progress and get back here for another iteration. + * The kvm_x86_ops hooks communicate this by returning -EBUSY. + */ + if (vcpu->arch.smi_pending) { + r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + vcpu->arch.smi_pending = false; + ++vcpu->arch.smi_count; + enter_smm(vcpu); + can_inject = false; + } else + kvm_x86_ops.enable_smi_window(vcpu); + } - if (vcpu->arch.smi_pending && !is_smm(vcpu) && - kvm_x86_ops.smi_allowed(vcpu)) { - vcpu->arch.smi_pending = false; - ++vcpu->arch.smi_count; - enter_smm(vcpu); - } else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) { - --vcpu->arch.nmi_pending; - vcpu->arch.nmi_injected = true; - kvm_x86_ops.set_nmi(vcpu); - } else if (kvm_cpu_has_injectable_intr(vcpu)) { - /* - * Because interrupts can be injected asynchronously, we are - * calling check_nested_events again here to avoid a race condition. - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this - * proposal and current concerns. Perhaps we should be setting - * KVM_REQ_EVENT only on certain events and not unconditionally? - */ - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) { - r = kvm_x86_ops.check_nested_events(vcpu); - if (r != 0) - return r; + if (vcpu->arch.nmi_pending) { + r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + --vcpu->arch.nmi_pending; + vcpu->arch.nmi_injected = true; + kvm_x86_ops.set_nmi(vcpu); + can_inject = false; + WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0); } - if (kvm_x86_ops.interrupt_allowed(vcpu)) { - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), - false); + if (vcpu->arch.nmi_pending) + kvm_x86_ops.enable_nmi_window(vcpu); + } + + if (kvm_cpu_has_injectable_intr(vcpu)) { + r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); kvm_x86_ops.set_irq(vcpu); + WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0); } + if (kvm_cpu_has_injectable_intr(vcpu)) + kvm_x86_ops.enable_irq_window(vcpu); } - return 0; + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->hv_timer_pending && + kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + *req_immediate_exit = true; + + WARN_ON(vcpu->arch.exception.pending); + return; + +busy: + *req_immediate_exit = true; + return; } static void process_nmi(struct kvm_vcpu *vcpu) @@ -8169,24 +8296,13 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { - struct page *page = NULL; - if (!lapic_in_kernel(vcpu)) return; if (!kvm_x86_ops.set_apic_access_page_addr) return; - page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) - return; - kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page)); - - /* - * Do not pin apic access page in memory, the MMU notifier - * will call us again if it is migrated or swapped out. - */ - put_page(page); + kvm_x86_ops.set_apic_access_page_addr(vcpu); } void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -8206,13 +8322,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); - enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE; + fastpath_t exit_fastpath; bool req_immediate_exit = false; if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) { + if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) { r = 0; goto out; } @@ -8234,8 +8350,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu)) kvm_mmu_load_pgd(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_vcpu_flush_tlb(vcpu, true); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + kvm_vcpu_flush_tlb_all(vcpu); + + /* Flushing all ASIDs flushes the current ASID... */ + kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -8308,6 +8433,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_hv_process_stimers(vcpu); if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) kvm_vcpu_update_apicv(vcpu); + if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) + kvm_check_async_pf_completion(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -8318,32 +8445,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - if (inject_pending_event(vcpu) != 0) - req_immediate_exit = true; - else { - /* Enable SMI/NMI/IRQ window open exits if needed. - * - * SMIs have three cases: - * 1) They can be nested, and then there is nothing to - * do here because RSM will cause a vmexit anyway. - * 2) There is an ISA-specific reason why SMI cannot be - * injected, and the moment when this changes can be - * intercepted. - * 3) Or the SMI can be pending because - * inject_pending_event has completed the injection - * of an IRQ or NMI from the previous vmexit, and - * then we request an immediate exit to inject the - * SMI. - */ - if (vcpu->arch.smi_pending && !is_smm(vcpu)) - if (!kvm_x86_ops.enable_smi_window(vcpu)) - req_immediate_exit = true; - if (vcpu->arch.nmi_pending) - kvm_x86_ops.enable_nmi_window(vcpu); - if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops.enable_irq_window(vcpu); - WARN_ON(vcpu->arch.exception.pending); - } + inject_pending_event(vcpu, &req_immediate_exit); + if (req_int_win) + kvm_x86_ops.enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { update_cr8_intercept(vcpu); @@ -8391,8 +8495,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) kvm_x86_ops.sync_pir_to_irr(vcpu); - if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) - || need_resched() || signal_pending(current)) { + if (kvm_vcpu_exit_request(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); local_irq_enable(); @@ -8424,7 +8527,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } - kvm_x86_ops.run(vcpu); + exit_fastpath = kvm_x86_ops.run(vcpu); /* * Do this here before restoring debug registers on the host. And @@ -8455,7 +8558,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath); + kvm_x86_ops.handle_exit_irqoff(vcpu); /* * Consume any pending interrupts, including the possible source of @@ -8502,6 +8605,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) return r; cancel_injection: + if (req_immediate_exit) + kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops.cancel_injection(vcpu); if (unlikely(vcpu->arch.apic_attention)) kvm_lapic_sync_from_vapic(vcpu); @@ -8544,8 +8649,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) - kvm_x86_ops.check_nested_events(vcpu); + if (is_guest_mode(vcpu)) + kvm_x86_ops.nested_ops->check_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); @@ -8581,8 +8686,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu) break; } - kvm_check_async_pf_completion(vcpu); - if (signal_pending(current)) { r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; @@ -8727,8 +8830,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) trace_kvm_fpu(0); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int r; vcpu_load(vcpu); @@ -8746,18 +8850,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = -EAGAIN; if (signal_pending(current)) { r = -EINTR; - vcpu->run->exit_reason = KVM_EXIT_INTR; + kvm_run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; } goto out; } - if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { + if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { r = -EINVAL; goto out; } - if (vcpu->run->kvm_dirty_regs) { + if (kvm_run->kvm_dirty_regs) { r = sync_regs(vcpu); if (r != 0) goto out; @@ -8787,7 +8891,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: kvm_put_guest_fpu(vcpu); - if (vcpu->run->kvm_valid_regs) + if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -9379,9 +9483,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) } fx_init(vcpu); - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; @@ -9502,7 +9605,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.cr2 = 0; kvm_make_request(KVM_REQ_EVENT, vcpu); - vcpu->arch.apf.msr_val = 0; + vcpu->arch.apf.msr_en_val = 0; + vcpu->arch.apf.msr_int_val = 0; vcpu->arch.st.msr_val = 0; kvmclock_reset(vcpu); @@ -10040,7 +10144,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, { /* Still write protect RO slot */ if (new->flags & KVM_MEM_READONLY) { - kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL); + kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); return; } @@ -10080,7 +10184,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } else { int level = kvm_dirty_log_manual_protect_and_init_set(kvm) ? - PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL; + PG_LEVEL_2M : PG_LEVEL_4K; /* * If we're with initial-all-set, we don't need @@ -10182,11 +10286,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_NMI, vcpu) || (vcpu->arch.nmi_pending && - kvm_x86_ops.nmi_allowed(vcpu))) + kvm_x86_ops.nmi_allowed(vcpu, false))) return true; if (kvm_test_request(KVM_REQ_SMI, vcpu) || - (vcpu->arch.smi_pending && !is_smm(vcpu))) + (vcpu->arch.smi_pending && + kvm_x86_ops.smi_allowed(vcpu, false))) return true; if (kvm_arch_interrupt_allowed(vcpu) && @@ -10197,6 +10302,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (kvm_hv_has_stimer_pending(vcpu)) return true; + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->hv_timer_pending && + kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + return true; + return false; } @@ -10233,7 +10343,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) { - return kvm_x86_ops.interrupt_allowed(vcpu); + return kvm_x86_ops.interrupt_allowed(vcpu, false); } unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) @@ -10298,12 +10408,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) { + BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU)); + return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); } static inline u32 kvm_async_pf_next_probe(u32 key) { - return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); + return (key + 1) & (ASYNC_PF_PER_VCPU - 1); } static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -10321,7 +10433,7 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) int i; u32 key = kvm_async_pf_hash_fn(gfn); - for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && + for (i = 0; i < ASYNC_PF_PER_VCPU && (vcpu->arch.apf.gfns[key] != gfn && vcpu->arch.apf.gfns[key] != ~0); i++) key = kvm_async_pf_next_probe(key); @@ -10339,6 +10451,10 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) u32 i, j, k; i = j = kvm_async_pf_gfn_slot(vcpu, gfn); + + if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn)) + return; + while (true) { vcpu->arch.apf.gfns[i] = ~0; do { @@ -10357,18 +10473,32 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) } } -static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) +static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu) { + u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT; - return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, - sizeof(val)); + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason, + sizeof(reason)); } -static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) +static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token) { + unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token); - return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, - sizeof(u32)); + return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data, + &token, offset, sizeof(token)); +} + +static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu) +{ + unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token); + u32 val; + + if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data, + &val, offset, sizeof(val))) + return false; + + return !val; } static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) @@ -10376,9 +10506,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) return false; - if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || - (vcpu->arch.apf.send_user_only && - kvm_x86_ops.get_cpl(vcpu) == 0)) + if (!kvm_pv_async_pf_enabled(vcpu) || + (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0)) return false; return true; @@ -10398,7 +10527,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) * If interrupts are off we cannot even use an artificial * halt state. */ - return kvm_x86_ops.interrupt_allowed(vcpu); + return kvm_arch_interrupt_allowed(vcpu); } void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, @@ -10410,7 +10539,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && - !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { + !apf_put_user_notpresent(vcpu)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; fault.error_code = 0; @@ -10434,8 +10563,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { - struct x86_exception fault; - u32 val; + struct kvm_lapic_irq irq = { + .delivery_mode = APIC_DM_FIXED, + .vector = vcpu->arch.apf.vec + }; if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ @@ -10443,39 +10574,29 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_del_async_pf_gfn(vcpu, work->arch.gfn); trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); - if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && - !apf_get_user(vcpu, &val)) { - if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && - vcpu->arch.exception.pending && - vcpu->arch.exception.nr == PF_VECTOR && - !apf_put_user(vcpu, 0)) { - vcpu->arch.exception.injected = false; - vcpu->arch.exception.pending = false; - vcpu->arch.exception.nr = 0; - vcpu->arch.exception.has_error_code = false; - vcpu->arch.exception.error_code = 0; - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; - } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { - fault.vector = PF_VECTOR; - fault.error_code_valid = true; - fault.error_code = 0; - fault.nested_page_fault = false; - fault.address = work->arch.token; - fault.async_page_fault = true; - kvm_inject_page_fault(vcpu, &fault); - } + if (kvm_pv_async_pf_enabled(vcpu) && + !apf_put_user_ready(vcpu, work->arch.token)) { + vcpu->arch.apf.pageready_pending = true; + kvm_apic_set_irq(vcpu, &irq, NULL); } + vcpu->arch.apf.halted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) +{ + kvm_make_request(KVM_REQ_APF_READY, vcpu); + if (!vcpu->arch.apf.pageready_pending) + kvm_vcpu_kick(vcpu); +} + +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) + if (!kvm_pv_async_pf_enabled(vcpu)) return true; else - return kvm_can_do_async_pf(vcpu); + return apf_pageready_slot_free(vcpu); } void kvm_arch_start_assignment(struct kvm *kvm) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b968acc0516f..6eb62e97e59f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -125,6 +125,12 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; } +static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops.tlb_flush_current(vcpu); +} + static inline int is_pae(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); @@ -268,7 +274,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, bool kvm_vector_hashing_enabled(void); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); -enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); extern u64 host_xcr0; extern u64 supported_xcr0; @@ -358,5 +364,6 @@ static inline bool kvm_dr7_valid(u64 data) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu); +bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index c126571e5e2e..65d15df6212d 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -27,9 +27,20 @@ # include <asm/smp.h> #endif +static void delay_loop(u64 __loops); + +/* + * Calibration and selection of the delay mechanism happens only once + * during boot. + */ +static void (*delay_fn)(u64) __ro_after_init = delay_loop; +static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; + /* simple loop based delay: */ -static void delay_loop(unsigned long loops) +static void delay_loop(u64 __loops) { + unsigned long loops = (unsigned long)__loops; + asm volatile( " test %0,%0 \n" " jz 3f \n" @@ -49,9 +60,9 @@ static void delay_loop(unsigned long loops) } /* TSC based delay: */ -static void delay_tsc(unsigned long __loops) +static void delay_tsc(u64 cycles) { - u64 bclock, now, loops = __loops; + u64 bclock, now; int cpu; preempt_disable(); @@ -59,7 +70,7 @@ static void delay_tsc(unsigned long __loops) bclock = rdtsc_ordered(); for (;;) { now = rdtsc_ordered(); - if ((now - bclock) >= loops) + if ((now - bclock) >= cycles) break; /* Allow RT tasks to run */ @@ -77,7 +88,7 @@ static void delay_tsc(unsigned long __loops) * counter for this CPU. */ if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); + cycles -= (now - bclock); cpu = smp_processor_id(); bclock = rdtsc_ordered(); } @@ -86,65 +97,96 @@ static void delay_tsc(unsigned long __loops) } /* + * On Intel the TPAUSE instruction waits until any of: + * 1) the TSC counter exceeds the value provided in EDX:EAX + * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded + * 3) an external interrupt occurs + */ +static void delay_halt_tpause(u64 start, u64 cycles) +{ + u64 until = start + cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + /* + * Hard code the deeper (C0.2) sleep state because exit latency is + * small compared to the "microseconds" that usleep() will delay. + */ + __tpause(TPAUSE_C02_STATE, edx, eax); +} + +/* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that - * counts with TSC frequency. The input value is the loop of the - * counter, it will exit when the timer expires. + * counts with TSC frequency. The input value is the number of TSC cycles + * to wait. MWAITX will also exit when the timer expires. */ -static void delay_mwaitx(unsigned long __loops) +static void delay_halt_mwaitx(u64 unused, u64 cycles) { - u64 start, end, delay, loops = __loops; + u64 delay; + + delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); + /* + * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu + * variable as the monitor target. + */ + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); + + /* + * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not + * enter any deep C-state and we use it here in delay() to minimize + * wakeup latency. + */ + __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); +} + +/* + * Call a vendor specific function to delay for a given amount of time. Because + * these functions may return earlier than requested, check for actual elapsed + * time and call again until done. + */ +static void delay_halt(u64 __cycles) +{ + u64 start, end, cycles = __cycles; /* * Timer value of 0 causes MWAITX to wait indefinitely, unless there * is a store on the memory monitored by MONITORX. */ - if (loops == 0) + if (!cycles) return; start = rdtsc_ordered(); for (;;) { - delay = min_t(u64, MWAITX_MAX_LOOPS, loops); - - /* - * Use cpu_tss_rw as a cacheline-aligned, seldomly - * accessed per-cpu variable as the monitor target. - */ - __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); - - /* - * AMD, like Intel's MWAIT version, supports the EAX hint and - * EAX=0xf0 means, do not enter any deep C-state and we use it - * here in delay() to minimize wakeup latency. - */ - __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); - + delay_halt_fn(start, cycles); end = rdtsc_ordered(); - if (loops <= end - start) + if (cycles <= end - start) break; - loops -= end - start; - + cycles -= end - start; start = end; } } -/* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed - */ -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) +void __init use_tsc_delay(void) { if (delay_fn == delay_loop) delay_fn = delay_tsc; } +void __init use_tpause_delay(void) +{ + delay_halt_fn = delay_halt_tpause; + delay_fn = delay_halt; +} + void use_mwaitx_delay(void) { - delay_fn = delay_mwaitx; + delay_halt_fn = delay_halt_mwaitx; + delay_fn = delay_halt; } int read_current_timer(unsigned long *timer_val) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index dffe8e4d3140..c5437f2964ee 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -30,6 +30,7 @@ #include <asm/desc.h> /* store_idt(), ... */ #include <asm/cpu_entry_area.h> /* exception stack */ #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ +#include <asm/kvm_para.h> /* kvm_handle_async_pf */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -1359,6 +1360,24 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, unsigned long address) { prefetchw(¤t->mm->mmap_sem); + /* + * KVM has two types of events that are, logically, interrupts, but + * are unfortunately delivered using the #PF vector. These events are + * "you just accessed valid memory, but the host doesn't have it right + * now, so I'll put you to sleep if you continue" and "that memory + * you tried to access earlier is available now." + * + * We are relying on the interrupted context being sane (valid RSP, + * relevant locks not held, etc.), which is fine as long as the + * interrupted context had IF=1. We are also relying on the KVM + * async pf type field and CR2 being read consistently instead of + * getting values from real and async page faults mixed up. + * + * Fingers crossed. + */ + if (kvm_handle_async_pf(regs, (u32)address)) + return; + trace_page_fault_entries(regs, hw_error_code, address); if (unlikely(kmmio_fault(regs, address))) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 66cd150b7e54..96fde03aa987 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1475,8 +1475,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, for (i = 0; i < insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; - const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true; - const bool sstk = insn->src_reg == BPF_REG_AX ? false : true; + const bool dstk = insn->dst_reg != BPF_REG_AX; + const bool sstk = insn->src_reg != BPF_REG_AX; const u8 code = insn->code; const u8 *dst = bpf2ia32[insn->dst_reg]; const u8 *src = bpf2ia32[insn->src_reg]; diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 47b7702aaa40..e57f0d0a88d8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -84,7 +84,7 @@ extern long __put_user_bad(void); #define __put_user_check(x, ptr, size) \ ({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (access_ok(__pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -180,11 +180,11 @@ __asm__ __volatile__( \ #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ - const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (access_ok(__gu_addr, size)) \ __get_user_size((x), __gu_addr, (size), __gu_err); \ else \ - (x) = 0; \ + (x) = (__typeof__(*(ptr)))0; \ __gu_err; \ }) @@ -202,13 +202,15 @@ do { \ u64 __x; \ if (unlikely(__copy_from_user(&__x, ptr, 8))) { \ retval = -EFAULT; \ - (x) = 0; \ + (x) = (__typeof__(*(ptr)))0; \ } else { \ - (x) = *(__force __typeof__((ptr)))&__x; \ + (x) = *(__force __typeof__(*(ptr)) *)&__x; \ } \ break; \ } \ - default: (x) = 0; __get_user_bad(); \ + default: \ + (x) = (__typeof__(*(ptr)))0; \ + __get_user_bad(); \ } \ } while (0) @@ -270,15 +272,15 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) */ static inline unsigned long -__xtensa_clear_user(void *addr, unsigned long size) +__xtensa_clear_user(void __user *addr, unsigned long size) { - if (!__memset(addr, 0, size)) + if (!__memset((void __force *)addr, 0, size)) return size; return 0; } static inline unsigned long -clear_user(void *addr, unsigned long size) +clear_user(void __user *addr, unsigned long size) { if (access_ok(addr, size)) return __xtensa_clear_user(addr, size); @@ -290,10 +292,10 @@ clear_user(void *addr, unsigned long size) #ifndef CONFIG_GENERIC_STRNCPY_FROM_USER -extern long __strncpy_user(char *, const char *, long); +extern long __strncpy_user(char *dst, const char __user *src, long count); static inline long -strncpy_from_user(char *dst, const char *src, long count) +strncpy_from_user(char *dst, const char __user *src, long count) { if (access_ok(src, 1)) return __strncpy_user(dst, src, count); @@ -306,13 +308,11 @@ long strncpy_from_user(char *dst, const char *src, long count); /* * Return the size of a string (including the ending 0!) */ -extern long __strnlen_user(const char *, long); +extern long __strnlen_user(const char __user *str, long len); -static inline long strnlen_user(const char *str, long len) +static inline long strnlen_user(const char __user *str, long len) { - unsigned long top = __kernel_ok ? ~0UL : TASK_SIZE - 1; - - if ((unsigned long)str > top) + if (!access_ok(str, 1)) return 0; return __strnlen_user(str, len); } diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 06fbb0a171f1..fae33ddcaebb 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -959,14 +959,14 @@ ENDPROC(unrecoverable_exception) * of the proper size instead. * * This algorithm simply backs out the register changes started by the user - * excpetion handler, makes it appear that we have started a window underflow + * exception handler, makes it appear that we have started a window underflow * by rotating the window back and then setting the old window base (OWB) in * the 'ps' register with the rolled back window base. The 'movsp' instruction * will be re-executed and this time since the next window frames is in the * active AR registers it won't cause an exception. * * If the WindowUnderflow code gets a TLB miss the page will get mapped - * the the partial windeowUnderflow will be handeled in the double exception + * the partial WindowUnderflow will be handled in the double exception * handler. * * Entry condition: |