diff options
Diffstat (limited to 'arch/powerpc')
177 files changed, 2552 insertions, 963 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ccd2556bdb53..c7628e973084 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -141,7 +141,9 @@ AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) + CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) +CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) ifeq ($(CONFIG_PPC_BOOK3S_64),y) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4) diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts index 86266159521e..deb52e41ab84 100644 --- a/arch/powerpc/boot/dts/acadia.dts +++ b/arch/powerpc/boot/dts/acadia.dts @@ -219,6 +219,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts index 083984720b2f..7f5ff4168482 100644 --- a/arch/powerpc/boot/dts/adder875-redboot.dts +++ b/arch/powerpc/boot/dts/adder875-redboot.dts @@ -178,6 +178,6 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; }; diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts index e4554caf8f8d..bd9f33c57737 100644 --- a/arch/powerpc/boot/dts/adder875-uboot.dts +++ b/arch/powerpc/boot/dts/adder875-uboot.dts @@ -177,6 +177,6 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; }; diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index 746779202a12..8a7a10139bc9 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -410,6 +410,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts index 49ac36b16dd7..712430155b99 100644 --- a/arch/powerpc/boot/dts/amigaone.dts +++ b/arch/powerpc/boot/dts/amigaone.dts @@ -168,6 +168,6 @@ }; chosen { - linux,stdout-path = "/pci@80000000/isa@7/serial@3f8"; + stdout-path = "/pci@80000000/isa@7/serial@3f8"; }; }; diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts index 9198745f45fb..e987b5af9326 100644 --- a/arch/powerpc/boot/dts/asp834x-redboot.dts +++ b/arch/powerpc/boot/dts/asp834x-redboot.dts @@ -304,7 +304,7 @@ chosen { bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2"; - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts index aa68911f6560..538e42b1120d 100644 --- a/arch/powerpc/boot/dts/bamboo.dts +++ b/arch/powerpc/boot/dts/bamboo.dts @@ -295,6 +295,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts index 27f169e3ade9..c5beb72d18b7 100644 --- a/arch/powerpc/boot/dts/c2k.dts +++ b/arch/powerpc/boot/dts/c2k.dts @@ -361,6 +361,6 @@ }; }; chosen { - linux,stdout-path = &MPSC0; + stdout-path = &MPSC0; }; }; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index f2ad5815f08d..a04a4fcfde63 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -237,6 +237,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index c280e75c86bf..c3922fc03e0b 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -78,7 +78,7 @@ }; rtc@56 { - compatible = "mc,rv3029c2"; + compatible = "microcrystal,rv3029"; reg = <0x56>; }; diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts index ec2d142291b4..5d11e6ea7405 100644 --- a/arch/powerpc/boot/dts/ebony.dts +++ b/arch/powerpc/boot/dts/ebony.dts @@ -332,6 +332,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts index 48bcf7187924..7a1231d9d6f0 100644 --- a/arch/powerpc/boot/dts/eiger.dts +++ b/arch/powerpc/boot/dts/eiger.dts @@ -421,7 +421,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts index 53ef06cc2134..4ac9c5ab6e6b 100644 --- a/arch/powerpc/boot/dts/ep405.dts +++ b/arch/powerpc/boot/dts/ep405.dts @@ -225,6 +225,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts index e2d306ad37a6..721cb53758ae 100644 --- a/arch/powerpc/boot/dts/fsl/mvme7100.dts +++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts @@ -146,7 +146,7 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts index 6560283c5aec..9311b86b1bd9 100644 --- a/arch/powerpc/boot/dts/fsp2.dts +++ b/arch/powerpc/boot/dts/fsp2.dts @@ -607,7 +607,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@b0020000"; + stdout-path = "/plb/opb/serial@b0020000"; bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug"; }; }; diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts index 43e6f0c8e449..02bd304c7d38 100644 --- a/arch/powerpc/boot/dts/holly.dts +++ b/arch/powerpc/boot/dts/holly.dts @@ -191,6 +191,6 @@ }; chosen { - linux,stdout-path = "/tsi109@c0000000/serial@7808"; + stdout-path = "/tsi109@c0000000/serial@7808"; }; }; diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts index 71d3bb4931dc..b93bf2d9dd5b 100644 --- a/arch/powerpc/boot/dts/hotfoot.dts +++ b/arch/powerpc/boot/dts/hotfoot.dts @@ -291,6 +291,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts index 9c94fd737f7c..2e6e3a7b2604 100644 --- a/arch/powerpc/boot/dts/icon.dts +++ b/arch/powerpc/boot/dts/icon.dts @@ -442,6 +442,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@f0000200"; + stdout-path = "/plb/opb/serial@f0000200"; }; }; diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts index 23e9d9b7e400..f7063198b2dc 100644 --- a/arch/powerpc/boot/dts/iss4xx-mpic.dts +++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts @@ -150,6 +150,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts index 4ff6555c866d..5533aff25e41 100644 --- a/arch/powerpc/boot/dts/iss4xx.dts +++ b/arch/powerpc/boot/dts/iss4xx.dts @@ -111,6 +111,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index f913dbe25d35..02629e119b87 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -505,6 +505,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@f0000200"; + stdout-path = "/plb/opb/serial@f0000200"; }; }; diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts index 8c9429033618..d9613b7b945f 100644 --- a/arch/powerpc/boot/dts/klondike.dts +++ b/arch/powerpc/boot/dts/klondike.dts @@ -222,6 +222,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@50001000"; + stdout-path = "/plb/opb/serial@50001000"; }; }; diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts index 5d68236e7c3c..fe6c17c8812a 100644 --- a/arch/powerpc/boot/dts/ksi8560.dts +++ b/arch/powerpc/boot/dts/ksi8560.dts @@ -339,6 +339,6 @@ chosen { - linux,stdout-path = "/soc/cpm/serial@91a00"; + stdout-path = "/soc/cpm/serial@91a00"; }; }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index b5413cb85f13..843f156a49c4 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -25,7 +25,7 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; cpus { diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts index 6d2cddf64cfd..98282c18d989 100644 --- a/arch/powerpc/boot/dts/mpc8272ads.dts +++ b/arch/powerpc/boot/dts/mpc8272ads.dts @@ -262,6 +262,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@11a00"; + stdout-path = "/soc/cpm/serial@11a00"; }; }; diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts index 34c1f48b1a09..4443fac3f576 100644 --- a/arch/powerpc/boot/dts/mpc866ads.dts +++ b/arch/powerpc/boot/dts/mpc866ads.dts @@ -185,6 +185,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@a80"; + stdout-path = "/soc/cpm/serial@a80"; }; }; diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts index 4e93bd961e0f..5b037f51741d 100644 --- a/arch/powerpc/boot/dts/mpc885ads.dts +++ b/arch/powerpc/boot/dts/mpc885ads.dts @@ -227,6 +227,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@a80"; + stdout-path = "/soc/cpm/serial@a80"; }; }; diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts index 1ecb341a232a..a7eb6d25903d 100644 --- a/arch/powerpc/boot/dts/mvme5100.dts +++ b/arch/powerpc/boot/dts/mvme5100.dts @@ -179,7 +179,7 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts index 18e7d79ee4c3..d10b0411809b 100644 --- a/arch/powerpc/boot/dts/obs600.dts +++ b/arch/powerpc/boot/dts/obs600.dts @@ -309,6 +309,6 @@ }; }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts index 0c525ff0c257..a477615e3468 100644 --- a/arch/powerpc/boot/dts/pq2fads.dts +++ b/arch/powerpc/boot/dts/pq2fads.dts @@ -242,6 +242,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@11a00"; + stdout-path = "/soc/cpm/serial@11a00"; }; }; diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts index 9684c80e4093..e59829cff556 100644 --- a/arch/powerpc/boot/dts/rainier.dts +++ b/arch/powerpc/boot/dts/rainier.dts @@ -344,7 +344,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; bootargs = "console=ttyS0,115200"; }; }; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index d86a3a498118..f3e046fb49e2 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -381,7 +381,7 @@ chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts index 088361cf4636..7d15f18e1180 100644 --- a/arch/powerpc/boot/dts/sam440ep.dts +++ b/arch/powerpc/boot/dts/sam440ep.dts @@ -288,6 +288,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts index e41b88a5eaee..60d211da9593 100644 --- a/arch/powerpc/boot/dts/sequoia.dts +++ b/arch/powerpc/boot/dts/sequoia.dts @@ -406,7 +406,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; bootargs = "console=ttyS0,115200"; }; }; diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts index 2a555738517e..99f6f544dc5f 100644 --- a/arch/powerpc/boot/dts/storcenter.dts +++ b/arch/powerpc/boot/dts/storcenter.dts @@ -137,6 +137,6 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts index 1657ad0bf8a6..803f1bff7fa8 100644 --- a/arch/powerpc/boot/dts/taishan.dts +++ b/arch/powerpc/boot/dts/taishan.dts @@ -422,6 +422,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000300"; + stdout-path = "/plb/opb/serial@40000300"; }; }; diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts index 391a4e299783..66f1c6312de6 100644 --- a/arch/powerpc/boot/dts/virtex440-ml507.dts +++ b/arch/powerpc/boot/dts/virtex440-ml507.dts @@ -32,7 +32,7 @@ } ; chosen { bootargs = "console=ttyS0 root=/dev/ram"; - linux,stdout-path = &RS232_Uart_1; + stdout-path = &RS232_Uart_1; } ; cpus { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts index 81201d3907e2..3b736ca26ddc 100644 --- a/arch/powerpc/boot/dts/virtex440-ml510.dts +++ b/arch/powerpc/boot/dts/virtex440-ml510.dts @@ -26,7 +26,7 @@ } ; chosen { bootargs = "console=ttyS0 root=/dev/ram"; - linux,stdout-path = "/plb@0/serial@83e00000"; + stdout-path = "/plb@0/serial@83e00000"; } ; cpus { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts index 4a9f726ada13..0872862c9363 100644 --- a/arch/powerpc/boot/dts/walnut.dts +++ b/arch/powerpc/boot/dts/walnut.dts @@ -241,6 +241,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index ea9053ef4819..b4f32740870e 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -304,6 +304,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 646acfbef0dd..d5e14421c39a 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -503,6 +503,6 @@ /* Needed for dtbImage boot wrapper compatibility */ chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts index 30bb4753577a..56508785ce13 100644 --- a/arch/powerpc/boot/dts/yosemite.dts +++ b/arch/powerpc/boot/dts/yosemite.dts @@ -327,6 +327,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7330150bfe34..d9713ad62e3c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs); void single_step_exception(struct pt_regs *regs); void program_check_exception(struct pt_regs *regs); void alignment_exception(struct pt_regs *regs); +void slb_miss_bad_addr(struct pt_regs *regs); void StackOverflow(struct pt_regs *regs); void nonrecoverable_exception(struct pt_regs *regs); void kernel_fp_unavailable_exception(struct pt_regs *regs); @@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx, long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs); +int sys_debug_setcontext(struct ucontext __user *ctx, + int ndbg, struct sig_dbg_op __user *dbg, + int r6, int r7, int r8, + struct pt_regs *regs); +int +ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp); +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); #endif + +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); long sys_switch_endian(void); notrace unsigned int __check_irq_replay(void); void notrace restore_interrupts(void); @@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64); void _mcount(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 37671feb2bf6..c8c836e8ad1b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -80,6 +80,16 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; + typedef struct { mm_context_id_t id; u16 user_psize; /* page size index */ @@ -94,9 +104,18 @@ typedef struct { struct npu_context *npu_context; #ifdef CONFIG_PPC_MM_SLICES - u64 low_slices_psize; /* SLB page size encodings */ + /* SLB page size encodings*/ + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long slb_addr_limit; +# ifdef CONFIG_PPC_64K_PAGES + struct slice_mask mask_64k; +# endif + struct slice_mask mask_4k; +# ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_16m; + struct slice_mask mask_16g; +# endif #else u16 sllp; /* SLB page size encoding */ #endif diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h new file mode 100644 index 000000000000..db0dedab65ee --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H +#define _ASM_POWERPC_BOOK3S_64_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 28 +#define SLICE_LOW_TOP (0x100000000ul) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 40 +#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) +#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) + +#else /* CONFIG_PPC_MM_SLICES */ + +#define get_slice_psize(mm, addr) ((mm)->context.user_psize) +#define slice_set_user_psize(mm, psize) \ +do { \ + (mm)->context.user_psize = (psize); \ + (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ +} while (0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index b77f0364df94..11843e37d9cf 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start, #ifdef CONFIG_PPC64 extern void flush_dcache_range(unsigned long start, unsigned long stop); extern void flush_inval_dcache_range(unsigned long start, unsigned long stop); -extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); #endif #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 2e2bacbdf6ed..66eba1e0007b 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { } /* CPU kernel features */ -/* Retain the 32b definitions all use bottom half of word */ +/* Definitions for features that we have on both 32-bit and 64-bit chips */ #define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001) -#define CPU_FTR_L2CR ASM_CONST(0x00000002) -#define CPU_FTR_SPEC7450 ASM_CONST(0x00000004) -#define CPU_FTR_ALTIVEC ASM_CONST(0x00000008) -#define CPU_FTR_TAU ASM_CONST(0x00000010) -#define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020) -#define CPU_FTR_USE_TB ASM_CONST(0x00000040) -#define CPU_FTR_L2CSR ASM_CONST(0x00000080) -#define CPU_FTR_601 ASM_CONST(0x00000100) -#define CPU_FTR_DBELL ASM_CONST(0x00000200) -#define CPU_FTR_CAN_NAP ASM_CONST(0x00000400) -#define CPU_FTR_L3CR ASM_CONST(0x00000800) -#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000) -#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000) -#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000) -#define CPU_FTR_NO_DPM ASM_CONST(0x00008000) -#define CPU_FTR_476_DD2 ASM_CONST(0x00010000) -#define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000) -#define CPU_FTR_NO_BTIC ASM_CONST(0x00040000) -#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000) -#define CPU_FTR_PPC_LE ASM_CONST(0x00200000) -#define CPU_FTR_REAL_LE ASM_CONST(0x00400000) -#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000) -#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000) -#define CPU_FTR_SPE ASM_CONST(0x02000000) -#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000) -#define CPU_FTR_LWSYNC ASM_CONST(0x08000000) -#define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000) -#define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000) -#define CPU_FTR_EMB_HV ASM_CONST(0x40000000) +#define CPU_FTR_ALTIVEC ASM_CONST(0x00000002) +#define CPU_FTR_DBELL ASM_CONST(0x00000004) +#define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) +#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) +#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) +#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) +#define CPU_FTR_LWSYNC ASM_CONST(0x00000080) +#define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) +#define CPU_FTR_EMB_HV ASM_CONST(0x00000200) + +/* Definitions for features that only exist on 32-bit chips */ +#ifdef CONFIG_PPC32 +#define CPU_FTR_601 ASM_CONST(0x00001000) +#define CPU_FTR_L2CR ASM_CONST(0x00002000) +#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000) +#define CPU_FTR_TAU ASM_CONST(0x00008000) +#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000) +#define CPU_FTR_USE_RTC ASM_CONST(0x00020000) +#define CPU_FTR_L3CR ASM_CONST(0x00040000) +#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000) +#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000) +#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000) +#define CPU_FTR_NO_DPM ASM_CONST(0x00400000) +#define CPU_FTR_476_DD2 ASM_CONST(0x00800000) +#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000) +#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000) +#define CPU_FTR_PPC_LE ASM_CONST(0x04000000) +#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000) +#define CPU_FTR_SPE ASM_CONST(0x10000000) +#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000) +#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000) + +#else /* CONFIG_PPC32 */ +/* Define these to 0 for the sake of tests in common code */ +#define CPU_FTR_601 (0) +#define CPU_FTR_PPC_LE (0) +#endif /* - * Add the 64-bit processor unique features in the top half of the word; + * Definitions for the 64-bit processor unique features; * on 32-bit, make the names available but defined to be 0. */ #ifdef __powerpc64__ @@ -174,38 +181,41 @@ static inline void cpu_feature_keys_init(void) { } #define LONG_ASM_CONST(x) 0 #endif -#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000) -#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) -#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) -#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000) -#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) -#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) -#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) -#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000) -#define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000) -#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000) -#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000) -#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000) -#define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000) -#define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000) -#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000) -#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000) -#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000) -#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) -#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) -#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) -#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) -#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) -#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) -#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) -#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000) -#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) -#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) -#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) -#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) -#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) -#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) +#define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000) +#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000) +#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000000004000) +#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000) +#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000) +#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000) +#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000) +#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000) +#define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000) +#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000) +#define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000) +#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000) +#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) +#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) +#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) +#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) +#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) +#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) +#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) +#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) +#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) +#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) +#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) +#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) +#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) +#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000) +#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000) +#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000) +#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000) +#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000) +#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) +#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) +#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) #ifndef __ASSEMBLY__ @@ -286,21 +296,19 @@ static inline void cpu_feature_keys_init(void) { } #endif #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) -#define CPU_FTRS_603 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC) +#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_604 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_PPC_LE) +#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE) #define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_740 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) #define CPU_FTRS_750 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) #define CPU_FTRS_750CL (CPU_FTRS_750) @@ -309,125 +317,118 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX) #define CPU_FTRS_750GX (CPU_FTRS_750FX) #define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_7400 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \ CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \ CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447A (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7448 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) -#define CPU_FTRS_82XX (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB) +#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON) #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) -#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) -#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) +#define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) +#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) +#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ +#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \ CPU_FTR_DEBUG_LVL_EXC) -#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) /* * e5500/e6500 erratum A-006958 is a timebase bug that can use the * same workaround as CPU_FTR_CELL_TB_BUG. */ -#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER4 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_STCX_CHECKS_ADDRESS) -#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ CPU_FTR_HVMODE | CPU_FTR_DABRX) -#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX) -#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -435,7 +436,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \ CPU_FTR_DABRX) -#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -444,7 +445,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) -#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -456,7 +457,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) -#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -464,22 +465,24 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ - CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ + CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) -#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_P9_TM_HV_ASSIST | \ + CPU_FTR_P9_TM_XER_SO_BUG) +#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX) -#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) -#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) +#define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E @@ -490,7 +493,8 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1) + CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ + CPU_FTRS_POWER9_DD2_2) #endif #else enum { diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index fc97404de0a3..ce5da214ffe5 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } void set_breakpoint(struct arch_hw_breakpoint *brk); void __set_breakpoint(struct arch_hw_breakpoint *brk); +bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fd37cc101f4f..c2266ca61853 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } +static inline bool eeh_state_active(int state) +{ + return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) + == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); +} + typedef void *(*eeh_traverse_func)(void *data, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 1e551a2d6f82..9884e872686f 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -34,7 +34,8 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); void eeh_remove_event(struct eeh_pe *pe, bool force); -void eeh_handle_event(struct eeh_pe *pe); +void eeh_handle_normal_event(struct eeh_pe *pe); +void eeh_handle_special_event(void); #endif /* __KERNEL__ */ #endif /* ASM_POWERPC_EEH_EVENT_H */ diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 90863245df53..d3a7e36f1402 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) { - unsigned long register r0 asm("r0"); - unsigned long register r3 asm("r3") = in[0]; - unsigned long register r4 asm("r4") = in[1]; - unsigned long register r5 asm("r5") = in[2]; - unsigned long register r6 asm("r6") = in[3]; - unsigned long register r7 asm("r7") = in[4]; - unsigned long register r8 asm("r8") = in[5]; - unsigned long register r9 asm("r9") = in[6]; - unsigned long register r10 asm("r10") = in[7]; - unsigned long register r11 asm("r11") = nr; - unsigned long register r12 asm("r12"); + register unsigned long r0 asm("r0"); + register unsigned long r3 asm("r3") = in[0]; + register unsigned long r4 asm("r4") = in[1]; + register unsigned long r5 asm("r5") = in[2]; + register unsigned long r6 asm("r6") = in[3]; + register unsigned long r7 asm("r7") = in[4]; + register unsigned long r8 asm("r8") = in[5]; + register unsigned long r9 asm("r9") = in[6]; + register unsigned long r10 asm("r10") = in[7]; + register unsigned long r11 asm("r11") = nr; + register unsigned long r12 asm("r12"); asm volatile("bl epapr_hypercall_start" : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 1a4847f67ea8..48f2ed2a71ae 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, void flush_dcache_icache_hugepage(struct page *page); -#if defined(CONFIG_PPC_MM_SLICES) -int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, +int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); -#else + static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) + return slice_is_hugepage_only_range(mm, addr, len); return 0; } -#endif void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index eca3f9c68907..2e2dddab5d65 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -88,6 +88,7 @@ #define H_P8 -61 #define H_P9 -62 #define H_TOO_BIG -64 +#define H_UNSUPPORTED -67 #define H_OVERLAP -68 #define H_INTERRUPT -69 #define H_BAD_DATA -70 @@ -337,6 +338,9 @@ #define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 #define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 #define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 +#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 +#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 +#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ac6432d9be46..90c708e5e7c4 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void arch_unregister_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); @@ -82,6 +83,7 @@ static inline void hw_breakpoint_disable(void) __set_breakpoint(&brk); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); +int hw_breakpoint_handler(struct die_args *args); #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index e8e3a0a04eb0..ee39ce56b2a2 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -66,6 +66,7 @@ extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index c6d3078bd8c3..b8b0be8f1a07 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } +extern void arch_irq_work_raise(void); #endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 09a802bb702f..a790d5cf6ea3 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -108,6 +108,8 @@ /* book3s_hv */ +#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500 + /* * Special trap used to indicate to host that this is a * passthrough interrupt that could not be handled diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 376ae803b69c..4c02a7378d06 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); +extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu); +extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu); +extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu); + extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 998f7b7aaa9e..c424e44f4c00 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i, set_bit_le(i, map); } +static inline u64 sanitize_msr(u64 msr) +{ + msr &= ~MSR_HV; + msr |= MSR_ME; + return msr; +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cr = vcpu->arch.cr_tm; + vcpu->arch.xer = vcpu->arch.xer_tm; + vcpu->arch.lr = vcpu->arch.lr_tm; + vcpu->arch.ctr = vcpu->arch.ctr_tm; + vcpu->arch.amr = vcpu->arch.amr_tm; + vcpu->arch.ppr = vcpu->arch.ppr_tm; + vcpu->arch.dscr = vcpu->arch.dscr_tm; + vcpu->arch.tar = vcpu->arch.tar_tm; + memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm, + sizeof(vcpu->arch.gpr)); + vcpu->arch.fp = vcpu->arch.fp_tm; + vcpu->arch.vr = vcpu->arch.vr_tm; + vcpu->arch.vrsave = vcpu->arch.vrsave_tm; +} + +static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cr_tm = vcpu->arch.cr; + vcpu->arch.xer_tm = vcpu->arch.xer; + vcpu->arch.lr_tm = vcpu->arch.lr; + vcpu->arch.ctr_tm = vcpu->arch.ctr; + vcpu->arch.amr_tm = vcpu->arch.amr; + vcpu->arch.ppr_tm = vcpu->arch.ppr; + vcpu->arch.dscr_tm = vcpu->arch.dscr; + vcpu->arch.tar_tm = vcpu->arch.tar; + memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr, + sizeof(vcpu->arch.gpr)); + vcpu->arch.fp_tm = vcpu->arch.fp; + vcpu->arch.vr_tm = vcpu->arch.vr; + vcpu->arch.vrsave_tm = vcpu->arch.vrsave; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index ab386af2904f..d978fdf698af 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -119,6 +119,7 @@ struct kvmppc_host_state { u8 host_ipi; u8 ptid; /* thread number within subcore when split */ u8 tid; /* thread number within whole core */ + u8 fake_suspend; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; void __iomem *xics_phys; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1f53b562726f..deb54293398c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -610,6 +610,7 @@ struct kvm_vcpu_arch { u64 tfhar; u64 texasr; u64 tfiar; + u64 orig_texasr; u32 cr_tm; u64 xer_tm; diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 2f806e329648..4f547752ae79 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -186,11 +186,32 @@ #define M_APG2 0x00000040 #define M_APG3 0x00000060 +#ifdef CONFIG_PPC_MM_SLICES +#include <asm/nohash/32/slice.h> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +#endif + #ifndef __ASSEMBLY__ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, 0); +}; + typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u16 user_psize; /* page size index */ + unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; + unsigned char high_slices_psize[0]; + unsigned long slb_addr_limit; + struct slice_mask mask_base_psize; /* 4k or 16k */ +# ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_512k; + struct slice_mask mask_8m; +# endif +#endif } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h new file mode 100644 index 000000000000..777d62e40ac0 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H +#define _ASM_POWERPC_NOHASH_32_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 26 /* 64 slices */ +#define SLICE_LOW_TOP (0x100000000ull) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 0 +#define SLICE_NUM_HIGH 0ul +#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h new file mode 100644 index 000000000000..ad0d6e3cc1c5 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/64/slice.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H +#define _ASM_POWERPC_NOHASH_64_SLICE_H + +#ifdef CONFIG_PPC_64K_PAGES +#define get_slice_psize(mm, addr) MMU_PAGE_64K +#else /* CONFIG_PPC_64K_PAGES */ +#define get_slice_psize(mm, addr) MMU_PAGE_4K +#endif /* !CONFIG_PPC_64K_PAGES */ +#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) + +#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 94bd1bf2c873..d886a5b7ff21 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -204,7 +204,9 @@ #define OPAL_NPU_SPA_SETUP 159 #define OPAL_NPU_SPA_CLEAR_CACHE 160 #define OPAL_NPU_TL_SET 161 -#define OPAL_LAST 161 +#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 +#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_LAST 165 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 12e70fb58700..dde60089d0d4 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); +int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr); +int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b62c31037cad..c97b41185ab7 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -32,6 +32,7 @@ #include <asm/accounting.h> #include <asm/hmi.h> #include <asm/cpuidle.h> +#include <asm/atomic.h> register struct paca_struct *local_paca asm("r13"); @@ -141,7 +142,7 @@ struct paca_struct { #ifdef CONFIG_PPC_BOOK3S mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES - u64 mm_ctx_low_slices_psize; + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long mm_ctx_slb_addr_limit; #else @@ -177,6 +178,8 @@ struct paca_struct { u8 thread_mask; /* Mask to denote subcore sibling threads */ u8 subcore_sibling_mask; + /* Flag to request this thread not to stop */ + atomic_t dont_stop; /* * Pointer to an array which contains pointer * to the sibling threads' paca. diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 8da5d4c1cab2..dec9ce5ba8af 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -126,7 +126,15 @@ extern long long virt_phys_offset; #ifdef CONFIG_FLATMEM #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) -#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr) +#ifndef __ASSEMBLY__ +extern unsigned long max_mapnr; +static inline bool pfn_valid(unsigned long pfn) +{ + unsigned long min_pfn = ARCH_PFN_OFFSET; + + return pfn >= min_pfn && pfn < max_mapnr; +} +#endif #endif #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) @@ -344,5 +352,6 @@ typedef struct page *pgtable_t; #include <asm-generic/memory_model.h> #endif /* __ASSEMBLY__ */ +#include <asm/slice.h> #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 56234c6fcd61..af04acdb873f 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -86,65 +86,6 @@ extern u64 ppc64_pft_size; #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 28 -#define SLICE_HIGH_SHIFT 40 - -#define SLICE_LOW_TOP (0x100000000ul) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) - -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) -#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) - -#ifndef __ASSEMBLY__ -struct mm_struct; - -extern unsigned long slice_get_unmapped_area(unsigned long addr, - unsigned long len, - unsigned long flags, - unsigned int psize, - int topdown); - -extern unsigned int get_slice_psize(struct mm_struct *mm, - unsigned long addr); - -extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); -extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, - unsigned long len, unsigned int psize); - -#endif /* __ASSEMBLY__ */ -#else -#define slice_init() -#ifdef CONFIG_PPC_BOOK3S_64 -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) -#else /* !CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while(0) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#define slice_set_range_psize(mm, start, len, psize) \ - slice_set_user_psize((mm), (psize)) -#endif /* CONFIG_PPC_MM_SLICES */ - -#ifdef CONFIG_HUGETLB_PAGE - -#ifdef CONFIG_PPC_MM_SLICES -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif - -#endif /* !CONFIG_HUGETLB_PAGE */ - #define VM_DATA_DEFAULT_FLAGS \ (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 723bf48e7494..67a8a9585d50 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -53,6 +53,8 @@ struct power_pmu { [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; + int n_blacklist_ev; + int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; }; diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 55eddf50d149..96c1a46acbd0 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H #define _ASM_POWERPC_PLPAR_WRAPPERS_H +#ifdef CONFIG_PPC_PSERIES + #include <linux/string.h> #include <linux/irqflags.h> @@ -9,14 +11,6 @@ #include <asm/paca.h> #include <asm/page.h> -/* Get state of physical CPU from query_cpu_stopped */ -int smp_query_cpu_stopped(unsigned int pcpu); -#define QCSS_STOPPED 0 -#define QCSS_STOPPING 1 -#define QCSS_NOT_STOPPED 2 -#define QCSS_HARDWARE_ERROR -1 -#define QCSS_HARDWARE_BUSY -2 - static inline long poll_pending(void) { return plpar_hcall_norets(H_POLL_PENDING); @@ -311,17 +305,17 @@ static inline long enable_little_endian_exceptions(void) return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); } -static inline long plapr_set_ciabr(unsigned long ciabr) +static inline long plpar_set_ciabr(unsigned long ciabr) { return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); } -static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) +static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } -static inline long plapr_signal_sys_reset(long cpu) +static inline long plpar_signal_sys_reset(long cpu) { return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } @@ -340,4 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +#else /* !CONFIG_PPC_PSERIES */ + +static inline long plpar_set_ciabr(unsigned long ciabr) +{ + return 0; +} +#endif /* CONFIG_PPC_PSERIES */ + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 3e5cf251ad9a..d2d8c28db336 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc); +extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind); +extern int pnv_pci_disable_tunnel(struct pci_dev *dev); +extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, + int enable); +extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, + u32 *pid, u32 *tid); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index dc5f6a5d4575..d1c2d2e658cf 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context, } static inline void pnv_tm_init(void) { } +static inline void pnv_power9_force_smt4(void) { } #endif #endif /* _ASM_POWERNV_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index f1083bcf449c..18883b8a6dac 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -232,6 +232,7 @@ #define PPC_INST_MSGSYNC 0x7c0006ec #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MSGCLRP 0x7c00015c +#define PPC_INST_MTMSRD 0x7c000164 #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 #define PPC_INST_PASTE 0x7c20070d @@ -239,8 +240,10 @@ #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_POPCNTD 0x7c0003f4 #define PPC_INST_POPCNTW 0x7c0002f4 +#define PPC_INST_RFEBB 0x4c000124 #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e +#define PPC_INST_RFID 0x4c000024 #define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 @@ -271,12 +274,14 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_VPMSUMW 0x10000488 #define PPC_INST_VPMSUMD 0x100004c8 +#define PPC_INST_VPERMXOR 0x1000002d #define PPC_INST_XXLOR 0xf0000490 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd #define PPC_INST_TRECLAIM 0x7c00075d #define PPC_INST_TABORT 0x7c00071d +#define PPC_INST_TSR 0x7c0005dd #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 @@ -517,6 +522,11 @@ #define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ VSX_XX3((t), (a), (b)))) +#define VPERMXOR(vrt, vra, vrb, vrc) \ + stringify_in_c(.long (PPC_INST_VPERMXOR | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) + #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) #define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e6c7eadf6bce..cb0f272ce123 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -156,6 +156,8 @@ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ #define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ +#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ +#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ @@ -237,7 +239,12 @@ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ +#define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */ +#define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */ +#define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */ +#define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ +#define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h new file mode 100644 index 000000000000..400a9050e035 --- /dev/null +++ b/arch/powerpc/include/asm/security_features.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Security related feature bit definitions. + * + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_SECURITY_FEATURES_H +#define _ASM_POWERPC_SECURITY_FEATURES_H + + +extern unsigned long powerpc_security_features; +extern bool rfi_flush; + +static inline void security_ftr_set(unsigned long feature) +{ + powerpc_security_features |= feature; +} + +static inline void security_ftr_clear(unsigned long feature) +{ + powerpc_security_features &= ~feature; +} + +static inline bool security_ftr_enabled(unsigned long feature) +{ + return !!(powerpc_security_features & feature); +} + + +// Features indicating support for Spectre/Meltdown mitigations + +// The L1-D cache can be flushed with ori r30,r30,0 +#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull + +// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2) +#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull + +// ori r31,r31,0 acts as a speculation barrier +#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull + +// Speculation past bctr is disabled +#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull + +// Entries in L1-D are private to a SMT thread +#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull + +// Indirect branch prediction cache disabled +#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull + + +// Features indicating need for Spectre/Meltdown mitigations + +// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest) +#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull + +// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace) +#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull + +// A speculation barrier should be used for bounds checks (Spectre variant 1) +#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull + +// Firmware configuration indicates user favours security over performance +#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull + +#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 469b7fdc9be4..bbcdf929be54 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -49,7 +49,7 @@ enum l1d_flush_type { L1D_FLUSH_MTTRIG = 0x8, }; -void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h new file mode 100644 index 000000000000..e40406cf5628 --- /dev/null +++ b/arch/powerpc/include/asm/slice.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SLICE_H +#define _ASM_POWERPC_SLICE_H + +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/slice.h> +#elif defined(CONFIG_PPC64) +#include <asm/nohash/64/slice.h> +#elif defined(CONFIG_PPC_MMU_NOHASH) +#include <asm/nohash/32/slice.h> +#endif + +#ifdef CONFIG_PPC_MM_SLICES + +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#ifndef __ASSEMBLY__ + +struct mm_struct; + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown); + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize); + +void slice_init_new_context_exec(struct mm_struct *mm); + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_SLICE_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index c3ca42cdc9f5..be8c9fa23983 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void) msr_check_and_clear(MSR_FP); } #else -static inline void __giveup_fpu(struct task_struct *t) { } static inline void save_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 4a12c00f8de3..5964145db03d 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)val; } +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index b240666b7bc1..db546c034905 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm); extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); +extern void hdec_interrupt(struct pt_regs *regs); /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; @@ -46,7 +47,7 @@ struct div_result { /* Accessor functions for the timebase (RTC on 601) registers. */ /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #ifdef CONFIG_6xx -#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB)) +#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC)) #else #define __USE_RTC() 0 #endif @@ -204,6 +205,7 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); extern void secondary_cpu_time_init(void); +extern void __init time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 51bfeb8777f0..a62ee663b2c8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -47,9 +47,13 @@ #else -#define __access_ok(addr, size, segment) \ - (((addr) <= (segment).seg) && \ - (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr))))) +static inline int __access_ok(unsigned long addr, unsigned long size, + mm_segment_t seg) +{ + if (addr > seg.seg) + return 0; + return (size == 0 || size - 1 <= seg.seg - addr); +} #endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1b6bc7fba996..d458c45e5004 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ea5eb91b836e..daf809a9b88e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -568,6 +568,7 @@ int main(void) OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar); OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar); OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr); + OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr); OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm); OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr); OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr); @@ -650,6 +651,7 @@ int main(void) HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_TID, tid); + HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); @@ -759,6 +761,7 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); + OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) STOP_SPR(STOP_PID, pid); STOP_SPR(STOP_LDBAR, ldbar); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index c5e5a94d9892..a9f3970693e1 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -226,7 +226,7 @@ BEGIN_FTR_SECTION beq 1f END_FTR_SECTION_IFSET(CPU_FTR_L3CR) lwz r6,CPU_SPEC_FEATURES(r4) - andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h beq 1f li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 462aed9bcf51..8d142e5d84cd 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500) * the feature on the primary core, avoid doing it on the * secondary core. */ - andis. r6, r3, CPU_FTR_EMB_HV@h + andi. r6, r3, CPU_FTR_EMB_HV beq 2f rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV stw r3, CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c40a9fc1e5d1..b3de017bcd71 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -553,11 +553,30 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, - { /* Power9 DD 2.1 or later (see DD2.0 above) */ + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.2 or later */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_features = CPU_FTRS_POWER9_DD2_2, .cpu_user_features = COMMON_USER_POWER9, .cpu_user_features2 = COMMON_USER2_POWER9, .mmu_features = MMU_FTRS_POWER9, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ca5d5b74618..4313ff07edca 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -54,8 +54,7 @@ struct dt_cpu_feature { }; #define CPU_FTRS_BASE \ - (CPU_FTR_USE_TB | \ - CPU_FTR_LWSYNC | \ + (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE |\ CPU_FTR_NODSISRALIGN |\ CPU_FTR_NOEXECUTE |\ @@ -590,6 +589,8 @@ static struct dt_cpu_feature_match __initdata {"virtual-page-class-key-protection", feat_enable, 0}, {"transactional-memory", feat_enable_tm, CPU_FTR_TM}, {"transactional-memory-v3", feat_enable_tm, 0}, + {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, + {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, {"idle-stop", feat_enable_idle_stop, 0}, @@ -709,9 +710,14 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + else if ((version & 0xffffefff) == 0x004e0202) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST | + CPU_FTR_P9_TM_XER_SO_BUG; - if ((version & 0xffff0000) == 0x004e0000) + if ((version & 0xffff0000) == 0x004e0000) { + cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2b9df0040d6b..bc640e4c5ca5 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Check PHB state */ ret = eeh_ops->get_state(phb_pe, NULL); if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { ret = 0; goto out; } @@ -433,7 +431,6 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; @@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * state, PE is in good state. */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - ((ret & active_flags) == active_flags)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) /* Frozen parent PE ? */ ret = eeh_ops->get_state(parent_pe, NULL); - if (ret > 0 && - (ret & active_flags) != active_flags) + if (ret > 0 && !eeh_state_active(ret)) pe = parent_pe; /* Next parent level */ @@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag) */ int eeh_pe_reset_full(struct eeh_pe *pe) { - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; unsigned int freset = 0; @@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe) /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & active_flags) == active_flags) + if (eeh_state_active(state)) break; if (state < 0) { @@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; struct pci_device_id *id; - int flags, ret; + int ret; /* Check PE state */ - flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); ret = eeh_ops->get_state(pe, NULL); if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) return 0; /* Unfrozen PE, nothing to do */ - if ((ret & flags) == flags) + if (eeh_state_active(ret)) return 0; /* Frozen PE, check if it needs PE level reset */ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index d4cc26618809..201943d54a6e 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset + * that implements this address. I/O port numbers are assumed to be offset * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 0c0b66fc5bfb..43ceb6263cd8 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -619,17 +619,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) /** * eeh_reset_device - Perform actual reset of a pci slot + * @driver_eeh_aware: Does the device's driver provide EEH support? * @pe: EEH PE * @bus: PCI bus corresponding to the isolcated slot + * @rmv_data: Optional, list to record removed devices * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, - struct eeh_rmv_data *rmv_data) + struct eeh_rmv_data *rmv_data, + bool driver_eeh_aware) { - struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); time64_t tstamp; int cnt, rc; struct eeh_dev *edev; @@ -645,16 +647,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - } else { - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - } - } else if (frozen_bus) { + if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); + } else { + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); } /* @@ -689,8 +687,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * the device up before the scripts have taken it down, * potentially weird things happen. */ - if (bus) { - pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); + if (!driver_eeh_aware || rmv_data->removed) { + pr_info("EEH: Sleep 5s ahead of %s hotplug\n", + (driver_eeh_aware ? "partial" : "complete")); ssleep(5); /* @@ -703,19 +702,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + if (!driver_eeh_aware) + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); } - } else if (frozen_bus && rmv_data->removed) { - pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); - ssleep(5); - - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) - eeh_add_virt_device(edev, NULL); - else - pci_hp_add_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -733,28 +723,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /** * eeh_handle_normal_event - Handle EEH events on a specific PE - * @pe: EEH PE + * @pe: EEH PE - which should not be used after we return, as it may + * have been invalidated. * * Attempts to recover the given PE. If recovery fails or the PE has failed * too many times, remove the PE. * - * Returns true if @pe should no longer be used, else false. + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). */ -static bool eeh_handle_normal_event(struct eeh_pe *pe) +void eeh_handle_normal_event(struct eeh_pe *pe) { - struct pci_bus *frozen_bus; + struct pci_bus *bus; struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; - frozen_bus = eeh_pe_bus_get(pe); - if (!frozen_bus) { + bus = eeh_pe_bus_get(pe); + if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return false; + return; } + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { @@ -806,7 +810,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus, NULL); + rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -858,7 +862,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL, &rmv_data); + rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -891,7 +895,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return false; + goto final; hard_fail: /* @@ -916,23 +920,21 @@ hard_fail: * all removed devices correctly to avoid access * the their PCI config any more. */ - if (frozen_bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - - pci_lock_rescan_remove(); - pci_hp_remove_devices(frozen_bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - /* The passed PE should no longer be used */ - return true; - } + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } - return false; +final: + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } /** @@ -942,7 +944,7 @@ hard_fail: * specific PE. Iterates through possible failures and handles them as * necessary. */ -static void eeh_handle_special_event(void) +void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; @@ -1005,15 +1007,7 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - /* - * eeh_handle_normal_event() can make the PE stale if it - * determines that the PE cannot possibly be recovered. - * Don't modify the PE state if that's the case. - */ - if (eeh_handle_normal_event(pe)) - continue; - - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { @@ -1049,28 +1043,3 @@ static void eeh_handle_special_event(void) break; } while (rc != EEH_NEXT_ERR_NONE); } - -/** - * eeh_handle_event - Reset a PCI device after hard lockup. - * @pe: EEH PE - * - * While PHB detects address or data parity errors on particular PCI - * slot, the associated PE will be frozen. Besides, DMA's occurring - * to wild addresses (which usually happen due to bugs in device - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, - * #PERR or other misc PCI-related errors also can trigger EEH errors. - * - * Recovery process consists of unplugging the device driver (which - * generated hotplug events to userspace), then issuing a PCI #RST to - * the device, then reconfiguring the PCI config space for all bridges - * & devices under this slot, and then finally restarting the device - * drivers (which cause a second set of hotplug events to go out to - * userspace). - */ -void eeh_handle_event(struct eeh_pe *pe) -{ - if (pe) - eeh_handle_normal_event(pe); - else - eeh_handle_special_event(); -} diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index accbf8b5fd46..61c9356bf9c9 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy) /* We might have event without binding PE */ pe = event->pe; if (pe) { - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); @@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy) pr_info("EEH: Detected PCI bus error on " "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); - eeh_handle_event(pe); - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { - eeh_handle_event(NULL); + eeh_handle_special_event(); } kfree(event); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1ecfd8ffb098..6bee20c43feb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1273,7 +1273,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) bne+ denorm_assist #endif - KVMTEST_PR(0x1500) + KVMTEST_HV(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) @@ -1285,7 +1285,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100) EXC_VIRT_NONE(0x5500, 0x100) #endif -TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) +TRAMP_KVM_HV(PACA_EXGEN, 0x1500) #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 53b9c1dfd7d9..4c1012b80d3b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,6 +33,7 @@ #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> +#include <asm/debug.h> #include <linux/uaccess.h> /* @@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the * 'symbolsize' should satisfy the check below. */ + if (!ppc_breakpoint_available()) + return -ENODEV; length_max = 8; /* DABR */ if (cpu_has_feature(CPU_FTR_DAWR)) { length_max = 512 ; /* 64 doublewords */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 01e1c1997893..89157cf452e3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -339,6 +339,7 @@ power_enter_stop: bne .Lhandle_esl_ec_set PPC_STOP li r3,0 /* Since we didn't lose state, return 0 */ + std r3, PACA_REQ_PSSCR(r13) /* * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so @@ -429,11 +430,29 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * r3 contains desired PSSCR register value. */ _GLOBAL(power9_idle_stop) +BEGIN_FTR_SECTION + lwz r5, PACA_DONT_STOP(r13) + cmpwi r5, 0 + bne 1f std r3, PACA_REQ_PSSCR(r13) + sync + lwz r5, PACA_DONT_STOP(r13) + cmpwi r5, 0 + bne 1f +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r4,power_enter_stop) b pnv_powersave_common /* No return */ +1: + /* + * We get here when TM / thread reconfiguration bug workaround + * code wants to get the CPU into SMT4 mode, and therefore + * we are being asked not to stop. + */ + li r3, 0 + std r3, PACA_REQ_PSSCR(r13) + blr /* return 0 for wakeup cause / SRR1 value */ /* * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, @@ -584,6 +603,8 @@ FTR_SECTION_ELSE_NESTED(71) mfspr r5, SPRN_PSSCR rldicl r5,r5,4,60 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) + li r0, 0 /* clear requested_psscr to say we're awake */ + std r0, PACA_REQ_PSSCR(r13) cmpd cr4,r5,r4 bge cr4,pnv_wakeup_tb_loss /* returns to caller */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 3280953a82cf..fa267e94090a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range) blr EXPORT_SYMBOL(flush_dcache_range) -/* - * Like above, but works on non-mapped physical addresses. - * Use only for non-LPAR setups ! It also assumes real mode - * is cacheable. Used for flushing out the DART before using - * it as uncacheable memory - * - * flush_dcache_phys_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL(flush_dcache_phys_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mfmsr r5 /* Disable MMU Data Relocation */ - ori r0,r5,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsr r0 - sync - isync - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - mtmsr r5 /* Re-enable MMU Data Relocation */ - sync - isync - blr - _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 496d6393bd41..ba681dac7b46 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part, tmp_index = part->index; - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); return rc; @@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, tmp_index = part->index; if (part->os_partition) { - rc = ppc_md.nvram_read((char *)&info, - sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); return rc; @@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void) "detected: 0-length partition\n"); goto out; } - tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL); err = -ENOMEM; if (!tmp_part) { printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 95ffedf14885..2fd563d05831 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -265,7 +265,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm->context.slb_addr_limit); get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_low_slices_psize, + &context->low_slices_psize, sizeof(context->low_slices_psize)); memcpy(&get_paca()->mm_ctx_high_slices_psize, &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); #else /* CONFIG_PPC_MM_SLICES */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1738c4127b32..24a591b4dbe9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU -void __giveup_fpu(struct task_struct *tsk) +static void __giveup_fpu(struct task_struct *tsk) { unsigned long msr; @@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } -void save_all(struct task_struct *tsk) +static void save_all(struct task_struct *tsk) { unsigned long usermsr; @@ -827,6 +827,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk) preempt_enable(); } +/* Check if we have DAWR or DABR hardware */ +bool ppc_breakpoint_available(void) +{ + if (cpu_has_feature(CPU_FTR_DAWR)) + return true; /* POWER8 DAWR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return false; /* POWER9 with DAWR disabled */ + /* DABR: Everything but POWER8 and POWER9 */ + return true; +} +EXPORT_SYMBOL_GPL(ppc_breakpoint_available); + #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4dffef947b8a..330c65f04820 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node) static void __init check_cpu_feature_properties(unsigned long node) { - unsigned long i; + int i; struct feature_property *fp = feature_properties; const __be32 *prop; - for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { + for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { prop = of_get_flat_dt_prop(node, fp->name, NULL); if (prop && be32_to_cpup(prop) >= fp->min_value) { cur_cpu_spec->cpu_features |= fp->cpu_feature; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index acf4b2e0530c..0323e073341d 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1110,7 +1110,8 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse) { + if (supported.radix_mmu && supported.radix_gtse && + IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { /* Radix preferred - but we require GTSE for now */ prom_debug("Asking for radix with GTSE\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ca72d7391d40..d23cf632edf0 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -41,6 +41,7 @@ #include <asm/switch_to.h> #include <asm/tm.h> #include <asm/asm-prototypes.h> +#include <asm/debug.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, struct perf_event_attr attr; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #ifndef CONFIG_PPC_ADV_DEBUG_REGS + bool set_bp = true; struct arch_hw_breakpoint hw_brk; #endif @@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; - if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { + if (!set_bp) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; @@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, return PTR_ERR(bp); } +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child, if (child->thread.hw_brk.address) return -ENOSPC; + if (!ppc_breakpoint_available()) + return -ENODEV; + child->thread.hw_brk = brk; return 1; @@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request, #endif #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ dbginfo.num_instruction_bps = 0; - dbginfo.num_data_bps = 1; + if (ppc_breakpoint_available()) + dbginfo.num_data_bps = 1; + else + dbginfo.num_data_bps = 0; dbginfo.num_condition_regs = 0; #ifdef CONFIG_PPC64 dbginfo.data_bp_alignment = 8; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c new file mode 100644 index 000000000000..2cee3dcd231b --- /dev/null +++ b/arch/powerpc/kernel/security.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Security related flags and so on. +// +// Copyright 2018, Michael Ellerman, IBM Corporation. + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/seq_buf.h> + +#include <asm/security_features.h> + + +unsigned long powerpc_security_features __read_mostly = \ + SEC_FTR_L1D_FLUSH_HV | \ + SEC_FTR_L1D_FLUSH_PR | \ + SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_FAVOUR_SECURITY; + + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool thread_priv; + + thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); + + if (rfi_flush || thread_priv) { + struct seq_buf s; + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + seq_buf_printf(&s, "Mitigation: "); + + if (rfi_flush) + seq_buf_printf(&s, "RFI Flush"); + + if (rfi_flush && thread_priv) + seq_buf_printf(&s, ", "); + + if (thread_priv) + seq_buf_printf(&s, "L1D private per thread"); + + seq_buf_printf(&s, "\n"); + + return s.len; + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool bcs, ccd, ori; + struct seq_buf s; + + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); + ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); + ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); + + if (bcs || ccd) { + seq_buf_printf(&s, "Mitigation: "); + + if (bcs) + seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + + if (bcs && ccd) + seq_buf_printf(&s, ", "); + + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else + seq_buf_printf(&s, "Vulnerable"); + + if (ori) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + + return s.len; +} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d73ec518ef80..a6002f9449b1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -919,6 +919,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PPC64 if (!radix_enabled()) init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#elif defined(CONFIG_PPC_8xx) + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; #else #error "context.addr_limit not initialized." #endif diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 51ebc01fff52..74457485574b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,6 +39,7 @@ #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/cpu_has_feature.h> +#include <asm/asm-prototypes.h> #define DBG(fmt...) @@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr) } /* Checks "l2cr=xxxx" command-line option */ -int __init ppc_setup_l2cr(char *str) +static int __init ppc_setup_l2cr(char *str) { if (cpu_has_feature(CPU_FTR_L2CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str) __setup("l2cr=", ppc_setup_l2cr); /* Checks "l3cr=xxxx" command-line option */ -int __init ppc_setup_l3cr(char *str) +static int __init ppc_setup_l3cr(char *str) { if (cpu_has_feature(CPU_FTR_L3CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync); #endif /* CONFIG_NVRAM */ -int __init ppc_init(void) +static int __init ppc_init(void) { /* clear the progress line */ if (ppc_md.progress) @@ -192,7 +193,6 @@ int __init ppc_init(void) } return 0; } - arch_initcall(ppc_init); void __init irqstack_early_init(void) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c388cc3357fa..7f7621668613 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -846,9 +846,6 @@ static void do_nothing(void *unused) void rfi_flush_enable(bool enable) { - if (rfi_flush == enable) - return; - if (enable) { do_rfi_flush_fixups(enabled_flush_types); on_each_cpu(do_nothing, NULL, 1); @@ -863,6 +860,10 @@ static void init_fallback_flush(void) u64 l1d_size, limit; int cpu; + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + l1d_size = ppc64_caches.l1d.size; limit = min(ppc64_bolted_size(), ppc64_rma_size); @@ -880,18 +881,18 @@ static void init_fallback_flush(void) } } -void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +void setup_rfi_flush(enum l1d_flush_type types, bool enable) { if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: Using fallback displacement flush\n"); + pr_info("rfi-flush: fallback displacement flush available\n"); init_fallback_flush(); } if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: Using ori type flush\n"); + pr_info("rfi-flush: ori type flush available\n"); if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: Using mttrig type flush\n"); + pr_info("rfi-flush: mttrig type flush available\n"); enabled_flush_types = types; @@ -902,13 +903,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { + bool enable; + if (val == 1) - rfi_flush_enable(true); + enable = true; else if (val == 0) - rfi_flush_enable(false); + enable = false; else return -EINVAL; + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + return 0; } @@ -927,12 +934,4 @@ static __init int rfi_flush_debugfs_init(void) } device_initcall(rfi_flush_debugfs_init); #endif - -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -{ - if (rfi_flush) - return sprintf(buf, "Mitigation: RFI Flush\n"); - - return sprintf(buf, "Vulnerable\n"); -} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 7c59d88b9d86..a6467f843acf 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ +extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); +extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); + static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a46de0035214..492f03451877 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { - unsigned char tmp; + unsigned char tmp __maybe_unused; int ctx_has_vsx_region = 0; #ifdef CONFIG_PPC64 @@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, { struct sig_dbg_op op; int i; - unsigned char tmp; + unsigned char tmp __maybe_unused; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.debug.dbcr0; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a32823dcd9a4..f7d96a68ecaa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1234,7 +1234,7 @@ void calibrate_delay(void) static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) { ppc_md.get_rtc_time(tm); - return rtc_valid_tm(tm); + return 0; } static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 22b01a3962f0..b44ec104a5a1 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = { CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, "__kernel_sync_dicache", "__kernel_sync_dicache_p5" }, +#ifdef CONFIG_PPC32 { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_gettimeofday", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_gettime", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_getres", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_get_tbfreq", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_time", NULL }, +#endif }; /* diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 85ba80de7133..4b19da8c87ae 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -74,9 +74,15 @@ kvm-hv-y += \ book3s_64_mmu_hv.o \ book3s_64_mmu_radix.o +kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm.o + kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o book3s_hv_rm_xive.o +kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm_builtin.o + ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_hmi.o \ @@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89707354c2ef..55c1022733c3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -741,6 +741,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, case H_SET_MODE_RESOURCE_SET_DAWR: if (!kvmppc_power8_compatible(vcpu)) return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; if (mflags) return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) @@ -1206,6 +1208,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; } break; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case BOOK3S_INTERRUPT_HV_SOFTPATCH: + /* + * This occurs for various TM-related instructions that + * we need to emulate on POWER9 DD2.2. We have already + * handled the cases where the guest was in real-suspend + * mode and was transitioning to transactional state. + */ + r = kvmhv_p9_tm_emulation(vcpu); + break; +#endif + case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; break; @@ -1978,7 +1993,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, * turn off the HFSCR bit, which causes those instructions to trap. */ vcpu->arch.hfscr = mfspr(SPRN_HFSCR); - if (!cpu_has_feature(CPU_FTR_TM)) + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) + vcpu->arch.hfscr |= HFSCR_TM; + else if (!cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr &= ~HFSCR_TM; if (cpu_has_feature(CPU_FTR_ARCH_300)) vcpu->arch.hfscr &= ~HFSCR_MSGP; @@ -2242,6 +2259,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) tpaca = &paca[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.ptid = cpu - vc->pcpu; + tpaca->kvm_hstate.fake_suspend = 0; /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ smp_wmb(); tpaca->kvm_hstate.kvm_vcore = vc; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f31f357b8c5a..af1772169eff 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -787,12 +787,18 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Load guest PMU registers */ @@ -886,8 +892,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX(r4) ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) + /* + * Handle broken DAWR case by not writing it. This means we + * can still store the DAWR register for migration. + */ +BEGIN_FTR_SECTION mtspr SPRN_DAWR, r5 mtspr SPRN_DAWRX, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) @@ -915,11 +927,14 @@ BEGIN_FTR_SECTION mtspr SPRN_ACOP, r6 mtspr SPRN_CSIGR, r7 mtspr SPRN_TACR, r8 + nop FTR_SECTION_ELSE /* POWER9-only registers */ ld r5, VCPU_TID(r4) ld r6, VCPU_PSSCR(r4) + lbz r8, HSTATE_FAKE_SUSPEND(r13) oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG ld r7, VCPU_HFSCR(r4) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 @@ -1370,6 +1385,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* For softpatch interrupt, go off and do TM instruction emulation */ + cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + beq kvmppc_tm_emul +#endif + /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi @@ -1729,12 +1750,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Increment yield count if they have a VPA */ @@ -1834,6 +1861,10 @@ BEGIN_FTR_SECTION ld r6, STACK_SLOT_DAWR(r1) ld r7, STACK_SLOT_DAWRX(r1) mtspr SPRN_CIABR, r5 + /* + * If the DAWR doesn't work, it's ok to write these here as + * this value should always be zero + */ mtspr SPRN_DAWR, r6 mtspr SPRN_DAWRX, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) @@ -2054,6 +2085,42 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) mtlr r0 blr +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Softpatch interrupt for transactional memory emulation cases + * on POWER9 DD2.2. This is early in the guest exit path - we + * haven't saved registers or done a treclaim yet. + */ +kvmppc_tm_emul: + /* Save instruction image in HEIR */ + mfspr r3, SPRN_HEIR + stw r3, VCPU_HEIR(r9) + + /* + * The cases we want to handle here are those where the guest + * is in real suspend mode and is trying to transition to + * transactional mode. + */ + lbz r0, HSTATE_FAKE_SUSPEND(r13) + cmpwi r0, 0 /* keep exiting guest if in fake suspend */ + bne guest_exit_cont + rldicl r3, r11, 64 - MSR_TS_S_LG, 62 + cmpwi r3, 1 /* or if not in suspend state */ + bne guest_exit_cont + + /* Call C code to do the emulation */ + mr r3, r9 + bl kvmhv_p9_tm_emulation_early + nop + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + cmpwi r3, 0 + beq guest_exit_cont /* continue exiting if not handled */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + b fast_interrupt_c_return /* go back to guest if handled */ +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing @@ -2506,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3,0 blr +2: +BEGIN_FTR_SECTION + /* POWER9 with disabled DAWR */ + li r3, H_UNSUPPORTED + blr +END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ -2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW + rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) @@ -2587,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ ld r9, HSTATE_KVM_VCPU(r13) bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* @@ -2700,12 +2779,18 @@ kvm_end_cede: #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* load up FP state */ @@ -3032,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) kvmppc_save_tm: mflr r0 std r0, PPC_LR_STKOFF(r1) + stdu r1, -PPC_MIN_STKFRM(r1) /* Turn on TM. */ mfmsr r8 @@ -3046,6 +3132,24 @@ kvmppc_save_tm: std r1, HSTATE_HOST_R1(r13) li r3, TM_CAUSE_KVM_RESCHED +BEGIN_FTR_SECTION + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ + cmpwi r0, 0 + beq 3f + rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ + beq 4f +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_catch +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + b 6f +3: + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ + mfspr r6, SPRN_TEXASR + std r6, VCPU_ORIG_TEXASR(r9) +6: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ li r5, 0 mtmsrd r5, 1 @@ -3057,6 +3161,43 @@ kvmppc_save_tm: SET_SCRATCH0(r13) GET_PACA(r13) std r9, PACATMSCRATCH(r13) + + /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ +BEGIN_FTR_SECTION + lbz r9, HSTATE_FAKE_SUSPEND(r13) + cmpwi r9, 0 + beq 2f + /* + * We were in fake suspend, so we are not going to save the + * register state as the guest checkpointed state (since + * we already have it), therefore we can now use any volatile GPR. + */ + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + HMT_MEDIUM + ld r6, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r6 +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_release +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + +4: + mfspr r3, SPRN_PSSCR + /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ + li r0, PSSCR_FAKE_SUSPEND + andc r3, r3, r0 + mtspr SPRN_PSSCR, r3 + ld r9, HSTATE_KVM_VCPU(r13) + /* Don't save TEXASR, use value from last exit in real suspend state */ + b 11f +2: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + ld r9, HSTATE_KVM_VCPU(r13) /* Get a few more GPRs free. */ @@ -3127,13 +3268,15 @@ kvmppc_save_tm: * change these outside of a transaction, so they must always be * context switched. */ + mfspr r7, SPRN_TEXASR + std r7, VCPU_TEXASR(r9) +11: mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) + addi r1, r1, PPC_MIN_STKFRM ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr @@ -3168,6 +3311,8 @@ kvmppc_restore_tm: mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 + li r0, 0 + stb r0, HSTATE_FAKE_SUSPEND(r13) ld r5, VCPU_MSR(r4) rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ @@ -3182,6 +3327,15 @@ kvmppc_restore_tm: mtspr SPRN_TEXASR, r7 /* + * If we are doing TM emulation for the guest on a POWER9 DD2, + * then we don't actually do a trechkpt -- we either set up + * fake-suspend mode, or emulate a TM rollback. + */ +BEGIN_FTR_SECTION + b .Ldo_tm_fake_load +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + + /* * We need to load up the checkpointed state for the guest. * We need to do this early as it will blow away any GPRs, VSRs and * some SPRs. @@ -3253,10 +3407,24 @@ kvmppc_restore_tm: /* Set the MSR RI since we have our registers back. */ li r5, MSR_RI mtmsrd r5, 1 - +9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr + +.Ldo_tm_fake_load: + cmpwi r5, 1 /* check for suspended state */ + bgt 10f + stb r5, HSTATE_FAKE_SUSPEND(r13) + b 9b /* and return */ +10: stdu r1, -PPC_MIN_STKFRM(r1) + /* guest is in transactional state, so simulate rollback */ + mr r3, r4 + bl kvmhv_emulate_tm_rollback + nop + ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ + addi r1, r1, PPC_MIN_STKFRM + b 9b #endif /* diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c new file mode 100644 index 000000000000..bf710ad3a6d7 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -0,0 +1,216 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) +{ + u64 texasr, tfiar; + u64 msr = vcpu->arch.shregs.msr; + + tfiar = vcpu->arch.pc & ~0x3ull; + texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) + texasr |= TEXASR_SUSP; + if (msr & MSR_PR) { + texasr |= TEXASR_PR; + tfiar |= 1; + } + vcpu->arch.tfiar = tfiar; + /* Preserve ROT and TL fields of existing TEXASR */ + vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr; +} + +/* + * This gets called on a softpatch interrupt on POWER9 DD2.2 processors. + * We expect to find a TM-related instruction to be emulated. The + * instruction image is in vcpu->arch.emul_inst. If the guest was in + * TM suspended or transactional state, the checkpointed state has been + * reclaimed and is in the vcpu struct. The CPU is in virtual mode in + * host context. + */ +int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 msr = vcpu->arch.shregs.msr; + u64 newmsr, bescr; + int ra, rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return RESUME_GUEST; + + case PPC_INST_RFEBB: + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_EBB_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + bescr = vcpu->arch.bescr; + /* expect to see a S->T transition requested */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + ((bescr >> 30) & 3) == 2)); + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + vcpu->arch.bescr = bescr; + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.ebbrr; + return RESUME_GUEST; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + /* check this is a Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return RESUME_GUEST; + + case PPC_INST_TSR: + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + /* L=1 => tresume, L=0 => tsuspend */ + if (instr & (1 << 21)) { + if (MSR_TM_SUSPENDED(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + } else { + if (MSR_TM_TRANSACTIONAL(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; + } + vcpu->arch.shregs.msr = msr; + return RESUME_GUEST; + + case PPC_INST_TRECLAIM: + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If no transaction active, generate TM bad thing */ + if (!MSR_TM_ACTIVE(msr)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + /* If failure was not previously recorded, recompute TEXASR */ + if (!(vcpu->arch.orig_texasr & TEXASR_FS)) { + ra = (instr >> 16) & 0x1f; + if (ra) + ra = kvmppc_get_gpr(vcpu, ra) & 0xff; + emulate_tx_failure(vcpu, ra); + } + + copy_from_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; + return RESUME_GUEST; + + case PPC_INST_TRECHKPT: + /* XXX do we need to check for PR=0 here? */ + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If transaction active or TEXASR[FS] = 0, bad thing */ + if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + + copy_to_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr = msr | MSR_TS_S; + return RESUME_GUEST; + } + + /* What should we do here? We didn't recognize the instruction */ + WARN_ON_ONCE(1); + return RESUME_GUEST; +} diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c new file mode 100644 index 000000000000..d98ccfd2b88c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +/* + * This handles the cases where the guest is in real suspend mode + * and we want to get back to the guest without dooming the transaction. + * The caller has checked that the guest is in real-suspend mode + * (MSR[TS] = S and the fake-suspend flag is not set). + */ +int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 newmsr, msr, bescr; + int rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return 1; + + case PPC_INST_RFEBB: + /* check for PR=1 and arch 2.06 bit set in PCR */ + msr = vcpu->arch.shregs.msr; + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB) || + ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB))) + return 0; + bescr = mfspr(SPRN_BESCR); + /* expect to see a S->T transition requested */ + if (((bescr >> 30) & 3) != 2) + return 0; + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + mtspr(SPRN_BESCR, bescr); + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = mfspr(SPRN_EBBRR); + return 1; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + msr = vcpu->arch.shregs.msr; + /* check this is a Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return 1; + + case PPC_INST_TSR: + /* we know the MSR has the TS field = S (0b01) here */ + msr = vcpu->arch.shregs.msr; + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM)) + return 0; + /* L=1 => tresume => set TS to T (0b10) */ + if (instr & (1 << 21)) + vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + /* Set CR0 to 0b0010 */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; + return 1; + } + + return 0; +} + +/* + * This is called when we are returning to a guest in TM transactional + * state. We roll the guest state back to the checkpointed state. + */ +void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) +{ + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ + vcpu->arch.pc = vcpu->arch.tfhar; + copy_from_checkpoint(vcpu); + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; +} diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4d2fe7..f9818d7d3381 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) if (!qpage) { pr_err("Failed to allocate queue %d for VCPU %d\n", prio, xc->server_num); - return -ENOMEM;; + return -ENOMEM; } memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642c78f5..4e387647b5af 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled; break; #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_CAP_PPC_HTM: r = hv_enabled && - (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP); + (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; +#endif default: r = 0; break; @@ -1345,7 +1348,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian) { - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_DONE; while (vcpu->arch.mmio_vmx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, 8, @@ -1608,7 +1611,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC out: +#endif vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 73697c4e3468..35f80ab7cbd8 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) patch_instruction(dest + 2, instrs[2]); } - printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 70274b7b4773..34d68f1b1b40 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest, * Copy from userspace to a buffer, using the largest possible * aligned accesses, up to sizeof(long). */ -static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb, +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int err = 0; @@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val, * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb, +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int err = 0; diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 849f50cd62f2..cf77d755246d 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd) mtspr(SPRN_M_TW, __pa(pgd) - offset); /* Update context */ - mtspr(SPRN_M_CASID, id); + mtspr(SPRN_M_CASID, id - 1); /* sync */ mb(); } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 656933c85925..1d049c78c82a 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_register_proc_table(unsigned long base, unsigned long page_size, - unsigned long table_size) -{ - unsigned long patb1 = base << 25; /* VSID */ - - patb1 |= (page_size << 5); /* sllp */ - patb1 |= table_size; - - partition_tb->patb1 = cpu_to_be64(patb1); - return 0; -} - void __init hpte_init_native(void) { mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; @@ -889,7 +877,4 @@ void __init hpte_init_native(void) mmu_hash_ops.hpte_clear_all = native_hpte_clear; mmu_hash_ops.flush_hash_range = native_flush_hash_range; mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index cf290d415dcd..17fc13cab8dc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -875,6 +875,12 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; + /* + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall + * to inform the hypervisor that we wish to use the HPT. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + register_process_table(0, 0, 0); #ifdef CONFIG_FA_DUMP /* * If firmware assisted dump is active firmware preserves @@ -1110,19 +1116,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES static unsigned int get_paca_psize(unsigned long addr) { - u64 lpsizes; - unsigned char *hpsizes; + unsigned char *psizes; unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - lpsizes = get_paca()->mm_ctx_low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xF; + } else { + psizes = get_paca()->mm_ctx_high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = get_paca()->mm_ctx_high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF; } #else diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 876da2bc1796..f4153f21d214 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); +#ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); +#endif return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif @@ -563,10 +565,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { #ifdef CONFIG_PPC_MM_SLICES - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); /* With radix we don't use slice, so derive it from vma*/ - if (!radix_enabled()) + if (!radix_enabled()) { + unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); + return 1UL << mmu_psize_to_shift(psize); + } #endif if (!is_vm_hugetlb_page(vma)) return PAGE_SIZE; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6419b33ca309..a2bf6965d04f 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -99,7 +99,7 @@ unsigned long __max_low_memory = MAX_LOW_MEM; /* * Check for command-line options that affect what MMU_init will do. */ -void __init MMU_setup(void) +static void __init MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ if (strstr(boot_command_line, "nobats")) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index fe8c61149fb8..85245ef97e72 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, +int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; @@ -148,7 +148,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 3f980baade4c..422be81bf69f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm) return index; /* - * In the case of exec, use the default limit, - * otherwise inherit it from the mm we are duplicating. - */ - if (!mm->context.slb_addr_limit) - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; - - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been * forced down to 4K. @@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm) * check against 0 is OK. */ if (mm->context.id == 0) - slice_set_user_psize(mm, mmu_virtual_psize); + slice_init_new_context_exec(mm); subpage_prot_init_new_context(mm); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 4554d6527682..be8f5c9d4d08 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) { pr_hard("initing context for mm @%p\n", mm); +#ifdef CONFIG_PPC_MM_SLICES + /* + * We have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + */ + if (mm->context.id == 0) + slice_init_new_context_exec(mm); +#endif mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; return 0; @@ -428,8 +439,8 @@ void __init mmu_context_init(void) * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { - first_context = 0; - last_context = 15; + first_context = 1; + last_context = 16; no_selective_tlbil = true; } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 422e80253a33..bd6ca74acf9e 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -155,7 +155,7 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int __meminit create_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) return radix__create_section_mapping(start, end); @@ -163,7 +163,7 @@ int create_section_mapping(unsigned long start, unsigned long end) return hash__create_section_mapping(start, end); } -int remove_section_mapping(unsigned long start, unsigned long end) +int __meminit remove_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) return radix__remove_section_mapping(start, end); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 2e10a964e290..ab9db0afd2c8 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -695,7 +695,7 @@ struct change_mapping_params { unsigned long aligned_end; }; -static int stop_machine_change_mapping(void *data) +static int __meminit stop_machine_change_mapping(void *data) { struct change_mapping_params *params = (struct change_mapping_params *)data; @@ -742,7 +742,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, /* * clear the pte and potentially split the mapping helper */ -static void split_kernel_mapping(unsigned long addr, unsigned long end, +static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, unsigned long size, pte_t *pte) { unsigned long mask = ~(size - 1); @@ -835,7 +835,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, } } -static void remove_pagetable(unsigned long start, unsigned long end) +static void __meminit remove_pagetable(unsigned long start, unsigned long end) { unsigned long addr, next; pud_t *pud_base; @@ -863,12 +863,12 @@ static void remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __ref radix__create_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__create_section_mapping(unsigned long start, unsigned long end) { return create_physical_mapping(start, end); } -int radix__remove_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) { remove_pagetable(start, end); return 0; @@ -888,7 +888,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, } #ifdef CONFIG_MEMORY_HOTPLUG -void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) +void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { remove_pagetable(start, start + page_size); } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 2cf5ef3fc50d..2c7c717fd2ea 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 5: /* * Handle lpsizes - * r9 is get_paca()->context.low_slices_psize, r11 is index + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index */ - ld r9,PACALOWSLICESPSIZE(r13) - mr r11,r10 + srdi r11,r10,1 /* index */ + addi r9,r11,PACALOWSLICESPSIZE + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ 6: sldi r11,r11,2 /* index * 4 */ /* Extract the psize and multiply to get an array offset */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 23ec2c5e3b78..1297b3ad7dd2 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -37,32 +37,25 @@ #include <asm/hugetlb.h> static DEFINE_SPINLOCK(slice_convert_lock); -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; #ifdef DEBUG int _slice_debug = 1; -static void slice_print_mask(const char *label, struct slice_mask mask) +static void slice_print_mask(const char *label, const struct slice_mask *mask) { if (!_slice_debug) return; - pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices); - pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices); + pr_devel("%s low_slice: %*pbl\n", label, + (int)SLICE_NUM_LOW, &mask->low_slices); + pr_devel("%s high_slice: %*pbl\n", label, + (int)SLICE_NUM_HIGH, mask->high_slices); } #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0) #else -static void slice_print_mask(const char *label, struct slice_mask mask) {} +static void slice_print_mask(const char *label, const struct slice_mask *mask) {} #define slice_dbg(fmt...) #endif @@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, unsigned long end = start + len - 1; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); if (start < SLICE_LOW_TOP) { - unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) - (1u << GET_LOW_SLICE_INDEX(start)); @@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); +#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) start = SLICE_LOW_TOP; +#endif return !slice_area_is_free(mm, start, end - start); } @@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, unsigned long i; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); for (i = 0; i < SLICE_NUM_LOW; i++) if (!slice_low_has_vma(mm, i)) @@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, - unsigned long high_limit) +#ifdef CONFIG_PPC_BOOK3S_64 +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) { - unsigned char *hpsizes; - int index, mask_index; - unsigned long i; - u64 lpsizes; - - ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &mm->context.mask_64k; +#endif + if (psize == MMU_PAGE_4K) + return &mm->context.mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &mm->context.mask_16m; + if (psize == MMU_PAGE_16G) + return &mm->context.mask_16g; +#endif + BUG(); +} +#elif defined(CONFIG_PPC_8xx) +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) +{ + if (psize == mmu_virtual_psize) + return &mm->context.mask_base_psize; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_512K) + return &mm->context.mask_512k; + if (psize == MMU_PAGE_8M) + return &mm->context.mask_8m; +#endif + BUG(); +} +#else +#error "Must define the slice masks for page sizes supported by the platform" +#endif - lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == psize) - ret->low_slices |= 1u << i; +static bool slice_check_range_fits(struct mm_struct *mm, + const struct slice_mask *available, + unsigned long start, unsigned long len) +{ + unsigned long end = start + len - 1; + u64 low_slices = 0; - if (high_limit <= SLICE_LOW_TOP) - return; + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - __set_bit(i, ret->high_slices); + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(start)); } -} + if ((low_slices & available->low_slices) != low_slices) + return false; -static int slice_check_fit(struct mm_struct *mm, - struct slice_mask mask, struct slice_mask available) -{ - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - /* - * Make sure we just do bit compare only to the max - * addr limit and not the full bit map size. - */ - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; + unsigned long i; - bitmap_and(result, mask.high_slices, - available.high_slices, slice_count); + for (i = start_index; i < start_index + count; i++) { + if (!test_bit(i, available->high_slices)) + return false; + } + } - return (mask.low_slices & available.low_slices) == mask.low_slices && - bitmap_equal(result, mask.high_slices, slice_count); + return true; } static void slice_flush_segments(void *parm) { +#ifdef CONFIG_PPC64 struct mm_struct *mm = parm; unsigned long flags; @@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm) local_irq_save(flags); slb_flush_and_rebolt(); local_irq_restore(flags); +#endif } -static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +static void slice_convert(struct mm_struct *mm, + const struct slice_mask *mask, int psize) { int index, mask_index; /* Write the new slice psize bits */ - unsigned char *hpsizes; - u64 lpsizes; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *psize_mask, *old_mask; unsigned long i, flags; + int old_psize; slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); + psize_mask = slice_mask_for_size(mm, psize); + /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ spin_lock_irqsave(&slice_convert_lock, flags); lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (mask.low_slices & (1u << i)) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); + for (i = 0; i < SLICE_NUM_LOW; i++) { + if (!(mask->low_slices & (1u << i))) + continue; - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + mask_index = i & 0x1; + index = i >> 1; + + /* Update the slice_mask */ + old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + old_mask->low_slices &= ~(1u << i); + psize_mask->low_slices |= 1u << i; + + /* Update the sizes array */ + lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); + } hpsizes = mm->context.high_slices_psize; for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + if (!test_bit(i, mask->high_slices)) + continue; + mask_index = i & 0x1; index = i >> 1; - if (test_bit(i, mask.high_slices)) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | + + /* Update the slice_mask */ + old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + __clear_bit(i, old_mask->high_slices); + __set_bit(i, psize_mask->high_slices); + + /* Update the sizes array */ + hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } @@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz * 'available' slice_mark. */ static bool slice_scan_available(unsigned long addr, - struct slice_mask available, - int end, - unsigned long *boundary_addr) + const struct slice_mask *available, + int end, unsigned long *boundary_addr) { unsigned long slice; if (addr < SLICE_LOW_TOP) { slice = GET_LOW_SLICE_INDEX(addr); *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; - return !!(available.low_slices & (1u << slice)); + return !!(available->low_slices & (1u << slice)); } else { slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!test_bit(slice, available.high_slices); + return !!test_bit(slice, available->high_slices); } } static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, - struct slice_mask mask, int psize, + const struct slice_mask *mask, int psize, int topdown, unsigned long high_limit) { if (topdown) @@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, high_limit); } -static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_copy_mask(struct slice_mask *dst, + const struct slice_mask *src) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices |= src->low_slices; - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); + dst->low_slices = src->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH); } -static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_or_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices &= ~src->low_slices; + dst->low_slices = src1->low_slices | src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); +} - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); +static inline void slice_andnot_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) +{ + dst->low_slices = src1->low_slices & ~src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); } #ifdef CONFIG_PPC_64K_PAGES @@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown) { - struct slice_mask mask; struct slice_mask good_mask; struct slice_mask potential_mask; - struct slice_mask compat_mask; + const struct slice_mask *maskp; + const struct slice_mask *compat_maskp = NULL; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long page_size = 1UL << pshift; @@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, } if (high_limit > mm->context.slb_addr_limit) { + /* + * Increasing the slb_addr_limit does not require + * slice mask cache to be recalculated because it should + * be already initialised beyond the old address limit. + */ mm->context.slb_addr_limit = high_limit; + on_each_cpu(slice_flush_segments, mm, 1); } - /* - * init different masks - */ - mask.low_slices = 0; - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); - - /* silence stupid warning */; - potential_mask.low_slices = 0; - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); - - compat_mask.low_slices = 0; - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); - /* Sanity checks */ BUG_ON(mm->task_size == 0); BUG_ON(mm->context.slb_addr_limit == 0); @@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask, high_limit); - slice_print_mask(" good_mask", good_mask); + maskp = slice_mask_for_size(mm, psize); /* * Here "good" means slices that are already the right page size, @@ -503,25 +548,31 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * search in good | compat | free, found => convert free. */ -#ifdef CONFIG_PPC_64K_PAGES - /* If we support combo pages, we can allow 64k pages in 4k slices */ - if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); + /* + * If we support combo pages, we can allow 64k pages in 4k slices + * The mask copies could be avoided in most cases here if we had + * a pointer to good mask for the next code to use. + */ + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); if (fixed) - slice_or_mask(&good_mask, &compat_mask); + slice_or_mask(&good_mask, maskp, compat_maskp); + else + slice_copy_mask(&good_mask, maskp); + } else { + slice_copy_mask(&good_mask, maskp); } -#endif + + slice_print_mask(" good_mask", &good_mask); + if (compat_maskp) + slice_print_mask(" compat_mask", compat_maskp); /* First check hint if it's valid or if we have MAP_FIXED */ if (addr != 0 || fixed) { - /* Build a mask for the requested range */ - slice_range_to_mask(addr, len, &mask); - slice_print_mask(" mask", mask); - /* Check if we fit in the good mask. If we do, we just return, * nothing else to do */ - if (slice_check_fit(mm, mask, good_mask)) { + if (slice_check_range_fits(mm, &good_mask, addr, len)) { slice_dbg(" fits good !\n"); return addr; } @@ -529,7 +580,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, + newaddr = slice_find_area(mm, len, &good_mask, psize, topdown, high_limit); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, @@ -544,12 +595,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * empty and thus can be converted */ slice_mask_for_free(mm, &potential_mask, high_limit); - slice_or_mask(&potential_mask, &good_mask); - slice_print_mask(" potential", potential_mask); + slice_or_mask(&potential_mask, &potential_mask, &good_mask); + slice_print_mask(" potential", &potential_mask); - if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + if (addr != 0 || fixed) { + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { + slice_dbg(" fits potential !\n"); + goto convert; + } } /* If we have MAP_FIXED and failed the above steps, then error out */ @@ -562,7 +615,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, + addr = slice_find_area(mm, len, &good_mask, psize, topdown, high_limit); if (addr != -ENOMEM) { slice_dbg(" found area at 0x%lx\n", addr); @@ -573,14 +626,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, + addr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); #ifdef CONFIG_PPC_64K_PAGES if (addr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ - slice_or_mask(&potential_mask, &compat_mask); - addr = slice_find_area(mm, len, potential_mask, + slice_or_mask(&potential_mask, &potential_mask, compat_maskp); + addr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); } #endif @@ -588,15 +641,18 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (addr == -ENOMEM) return -ENOMEM; - slice_range_to_mask(addr, len, &mask); + slice_range_to_mask(addr, len, &potential_mask); slice_dbg(" found potential area at 0x%lx\n", addr); - slice_print_mask(" mask", mask); + slice_print_mask(" mask", &potential_mask); convert: - slice_andnot_mask(&mask, &good_mask); - slice_andnot_mask(&mask, &compat_mask); - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { - slice_convert(mm, mask, psize); + slice_andnot_mask(&potential_mask, &potential_mask, &good_mask); + if (compat_maskp && !fixed) + slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp); + if (potential_mask.low_slices || + (SLICE_NUM_HIGH && + !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { + slice_convert(mm, &potential_mask, psize); if (psize > MMU_PAGE_BASE) on_each_cpu(slice_flush_segments, mm, 1); } @@ -627,94 +683,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) { - unsigned char *hpsizes; + unsigned char *psizes; int index, mask_index; - /* - * Radix doesn't use slice, but can get enabled along with MMU_SLICE - */ - if (radix_enabled()) { -#ifdef CONFIG_PPC_64K_PAGES - return MMU_PAGE_64K; -#else - return MMU_PAGE_4K; -#endif - } + VM_BUG_ON(radix_enabled()); + if (addr < SLICE_LOW_TOP) { - u64 lpsizes; - lpsizes = mm->context.low_slices_psize; + psizes = mm->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xf; + } else { + psizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = mm->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf; } EXPORT_SYMBOL_GPL(get_slice_psize); -/* - * This is called by hash_page when it needs to do a lazy conversion of - * an address space from real 64K pages to combo 4K pages (typically - * when hitting a non cacheable mapping on a processor or hypervisor - * that won't allow them for 64K pages). - * - * This is also called in init_new_context() to change back the user - * psize from whatever the parent context had it set to - * N.B. This may be called before mm->context.id has been set. - * - * This function will only change the content of the {low,high)_slice_psize - * masks, it will not flush SLBs as this shall be handled lazily by the - * caller. - */ -void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) +void slice_init_new_context_exec(struct mm_struct *mm) { - int index, mask_index; - unsigned char *hpsizes; - unsigned long flags, lpsizes; - unsigned int old_psize; - int i; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *mask; + unsigned int psize = mmu_virtual_psize; - slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); - - VM_BUG_ON(radix_enabled()); - spin_lock_irqsave(&slice_convert_lock, flags); + slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm); - old_psize = mm->context.user_psize; - slice_dbg(" old_psize=%d\n", old_psize); - if (old_psize == psize) - goto bail; + /* + * In the case of exec, use the default limit. In the + * case of fork it is just inherited from the mm being + * duplicated. + */ +#ifdef CONFIG_PPC64 + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#else + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; +#endif mm->context.user_psize = psize; - wmb(); + /* + * Set all slice psizes to the default. + */ lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == old_psize) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | - (((unsigned long)psize) << (mask_index * 4)); - } + memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); - - - - slice_dbg(" lsps=%lx, hsps=%lx\n", - (unsigned long)mm->context.low_slices_psize, - (unsigned long)mm->context.high_slices_psize); - - bail: - spin_unlock_irqrestore(&slice_convert_lock, flags); + /* + * Slice mask cache starts zeroed, fill the default size cache. + */ + mask = slice_mask_for_size(mm, psize); + mask->low_slices = ~0UL; + if (SLICE_NUM_HIGH) + bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, @@ -725,7 +747,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, VM_BUG_ON(radix_enabled()); slice_range_to_mask(start, len, &mask); - slice_convert(mm, mask, psize); + slice_convert(mm, &mask, psize); } #ifdef CONFIG_HUGETLB_PAGE @@ -748,33 +770,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, * for now as we only use slices with hugetlbfs enabled. This should * be fixed as the generic code gets fixed. */ -int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, +int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - struct slice_mask mask, available; + const struct slice_mask *maskp; unsigned int psize = mm->context.user_psize; - unsigned long high_limit = mm->context.slb_addr_limit; - if (radix_enabled()) - return 0; + VM_BUG_ON(radix_enabled()); - slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available, high_limit); + maskp = slice_mask_for_size(mm, psize); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { - struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); - slice_or_mask(&available, &compat_mask); + const struct slice_mask *compat_maskp; + struct slice_mask available; + + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + slice_or_mask(&available, maskp, compat_maskp); + return !slice_check_range_fits(mm, &available, addr, len); } #endif -#if 0 /* too verbose */ - slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", - mm, addr, len); - slice_print_mask(" mask", mask); - slice_print_mask(" available", available); -#endif - return !slice_check_fit(mm, mask, available); + return !slice_check_range_fits(mm, maskp, addr, len); } #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a07f5372a4bf..291eab4ed7cd 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid, rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -668,7 +668,7 @@ void radix__flush_tlb_all(void) rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ prs = 0; /* partition scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ asm volatile("ptesync": : :"memory"); diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 44d67b167e0b..2668cc414e4e 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId) /* Create cached_info and set spu_info[spu->number] to point to it. * spu->number is a system-wide value, not a per-node value. */ - info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { printk(KERN_ERR "SPU_PROF: " "%s, line %d: create vma_map failed\n", diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c index c579b16845da..f40e37316dd6 100644 --- a/arch/powerpc/oprofile/cell/vma_map.c +++ b/arch/powerpc/oprofile/cell/vma_map.c @@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, unsigned int size, unsigned int offset, unsigned int guard_ptr, unsigned int guard_val) { - struct vma_to_fileoffset_map *new = - kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); + struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", __func__, __LINE__); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index f89bbd54ecec..e032aeff3d6b 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); + + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(mfspr(SPRN_SDAR))) + *addrp = 0; } static bool regs_sihv(struct pt_regs *regs) @@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) /* invalid entry */ continue; + /* + * BHRB rolling buffer could very much contain the kernel + * addresses at this point. Check the privileges before + * exporting it to userspace (avoid exposure of regions + * where we could have speculative execution) + */ + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(addr)) + continue; + /* Branches are read most recent first (ie. mfbhrb 0 is * the most recent branch). * There are two types of valid entries: @@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu) */ write_mmcr0(cpuhw, val); mb(); + isync(); /* * Disable instruction sampling if it was enabled @@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mb(); + isync(); } cpuhw->disabled = 1; cpuhw->n_added = 0; ebb_switch_out(mmcr0); + +#ifdef CONFIG_PPC64 + /* + * These are readable by userspace, may contain kernel + * addresses and are not switched by context switch, so clear + * them now to avoid leaking anything to userspace in general + * including to another process. + */ + if (ppmu->flags & PPMU_ARCH_207S) { + mtspr(SPRN_SDAR, 0); + mtspr(SPRN_SIAR, 0); + } +#endif } local_irq_restore(flags); @@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } +static bool is_event_blacklisted(u64 ev) +{ + int i; + + for (i=0; i < ppmu->n_blacklist_ev; i++) { + if (ppmu->blacklist_ev[i] == ev) + return true; + } + + return false; +} + static int power_pmu_event_init(struct perf_event *event) { u64 ev; @@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event) ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return -EOPNOTSUPP; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) return err; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; break; case PERF_TYPE_RAW: ev = event->attr.config; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; break; default: return -ENOENT; diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index e99c6bf4d391..7de344b7d9cc 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT, 0x10012) EVENT(PM_BR_2PATH, 0x20036) /* ALternate branch event that are not strongly biased */ EVENT(PM_BR_2PATH_ALT, 0x40036) + +/* Blacklisted events */ +EVENT(PM_MRK_ST_DONE_L2, 0x10134) +EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056) +EVENT(PM_FLOP_CMPL, 0x100f4) +EVENT(PM_MRK_NTF_FIN, 0x20112) +EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024) +EVENT(PM_IFETCH_THROTTLE, 0x3405e) +EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c) +EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056) +EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c) +EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014) +EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c) +EVENT(PM_DTLB_MISS_16G, 0x1c058) +EVENT(PM_DERAT_MISS_2M, 0x1c05a) +EVENT(PM_DTLB_MISS_2M, 0x1c05c) +EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c) +EVENT(PM_DTLB_MISS_4K, 0x2c056) +EVENT(PM_DERAT_MISS_1G, 0x2c05a) +EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152) +EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156) +EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e) +EVENT(PM_DTLB_MISS_64K, 0x3c056) +EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152) +EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156) +EVENT(PM_DTLB_MISS_16M, 0x4c056) +EVENT(PM_DTLB_MISS_1G, 0x4c05a) +EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 24b5b5b7a206..2ca0b33b4efb 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -101,9 +101,45 @@ enum { #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +/* Nasty Power9 specific hack */ +#define PVR_POWER9_CUMULUS 0x00002000 + /* PowerISA v2.07 format attribute structure*/ extern struct attribute_group isa207_pmu_format_group; +int p9_dd21_bl_ev[] = { + PM_MRK_ST_DONE_L2, + PM_RADIX_PWC_L1_HIT, + PM_FLOP_CMPL, + PM_MRK_NTF_FIN, + PM_RADIX_PWC_L2_HIT, + PM_IFETCH_THROTTLE, + PM_MRK_L2_TM_ST_ABORT_SISTER, + PM_RADIX_PWC_L3_HIT, + PM_RUN_CYC_SMT2_MODE, + PM_TM_TX_PASS_RUN_INST, + PM_DISP_HELD_SYNC_HOLD, +}; + +int p9_dd22_bl_ev[] = { + PM_DTLB_MISS_16G, + PM_DERAT_MISS_2M, + PM_DTLB_MISS_2M, + PM_MRK_DTLB_MISS_1G, + PM_DTLB_MISS_4K, + PM_DERAT_MISS_1G, + PM_MRK_DERAT_MISS_2M, + PM_MRK_DTLB_MISS_4K, + PM_MRK_DTLB_MISS_16G, + PM_DTLB_MISS_64K, + PM_MRK_DERAT_MISS_1G, + PM_MRK_DTLB_MISS_64K, + PM_DISP_HELD_SYNC_HOLD, + PM_DTLB_MISS_16M, + PM_DTLB_MISS_1G, + PM_MRK_DTLB_MISS_16M, +}; + /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { { PM_INST_DISP, PM_INST_DISP_ALT }, @@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = { static int __init init_power9_pmu(void) { int rc = 0; + unsigned int pvr = mfspr(SPRN_PVR); /* Comes from cpu_specs[] */ if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) return -ENODEV; + /* Blacklist events */ + if (!(pvr & PVR_POWER9_CUMULUS)) { + if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) { + power9_pmu.blacklist_ev = p9_dd21_bl_ev; + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev); + } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) { + power9_pmu.blacklist_ev = p9_dd22_bl_ev; + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev); + } + } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { /* * Since PM_INST_CMPL may not provide right counts in all diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c index d50417e23add..4b859c840ea9 100644 --- a/arch/powerpc/platforms/4xx/msi.c +++ b/arch/powerpc/platforms/4xx/msi.c @@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); - msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL); + msi = kzalloc(sizeof(*msi), GFP_KERNEL); if (!msi) { dev_err(&dev->dev, "No memory for MSI structure\n"); return -ENOMEM; diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c index 85d9e37f5ccb..69d9f60d9fe5 100644 --- a/arch/powerpc/platforms/4xx/ocm.c +++ b/arch/powerpc/platforms/4xx/ocm.c @@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align, if (IS_ERR_VALUE(offset)) continue; - ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL); + ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL); if (!ocm_blk) { printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block"); rh_free(ocm_reg->rh, offset); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index e1274db53d48..2188d691a40f 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd) static void cpm_cascade(struct irq_desc *desc) { - struct irq_chip *chip = irq_desc_get_chip(desc); - int cascade_irq = cpm_get_irq(); - - if (cascade_irq >= 0) - generic_handle_irq(cascade_irq); - - chip->irq_eoi(&desc->irq_data); + generic_handle_irq(cpm_get_irq()); } /* Initialize the internal interrupt controllers. The number of diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a429d859f15d..5a8b1bf1e819 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -326,6 +326,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool default y if PPC_BOOK3S_64 + default y if PPC_8xx && HUGETLB_PAGE default n config PPC_HAVE_PMU_SUPPORT diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 6ea3f248b155..326d34e2aa02 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device) pr_devel("axon_msi: setting up dn %pOF\n", dn); - msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); + msic = kzalloc(sizeof(*msic), GFP_KERNEL); if (!msic) { printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n", dn); diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index d1e61e273e64..1200d0dea512 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n", np); - priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { pr_err("SPIDERPCI-IOWA:" "Can't allocate struct spiderpci_iowa_private"); diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index b847e9403566..d9de848dae47 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa) struct spu_lscsa *lscsa; unsigned char *p; - lscsa = vzalloc(sizeof(struct spu_lscsa)); + lscsa = vzalloc(sizeof(*lscsa)); if (!lscsa) return -ENOMEM; csa->lscsa = lscsa; diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index ade83829d5e8..7206f3f573d4 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base) out_be32(io_base + FLIPPER_ICR, 0xffffffff); } -struct irq_domain * __init flipper_pic_init(struct device_node *np) +static struct irq_domain * __init flipper_pic_init(struct device_node *np) { struct device_node *pi; struct irq_domain *irq_domain = NULL; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index 7feb325b636b..5c7e7ce6dbab 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -169,7 +169,7 @@ static int ug_getc(void) /* * Transmits a character. */ -void ug_udbg_putc(char ch) +static void ug_udbg_putc(char ch) { ug_putc(ch); } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 3408f315ef48..fa89f30e7f27 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) const u32 *psteps, *prate, *addrp; u32 steps; - host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL); + host = kzalloc(sizeof(*host), GFP_KERNEL); if (host == NULL) { printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n", np); diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index df3c93bef228..e0462fedcdb8 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata, while (length >= 12) { /* Allocate a structure */ - func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL); + func = kzalloc(sizeof(*func), GFP_KERNEL); if (func == NULL) goto bail; kref_init(&func->ref); @@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np, return -EBUSY; } - dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) { DBG("pmf: no memory !\n"); return -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 6c9d5199a7e2..703a350a7f4e 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o -obj-$(CONFIG_PPC_FTW) += nx-ftw.o obj-$(CONFIG_OCXL_BASE) += ocxl.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 33c86c1a1720..ddfc3544d285 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose, dev_pe = dev_pe->parent; while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE); - ret = eeh_ops->get_state(dev_pe, NULL); - if (ret <= 0 || (ret & active_flags) == active_flags) { + if (ret <= 0 || eeh_state_active(ret)) { dev_pe = dev_pe->parent; continue; } @@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) struct eeh_pe *phb_pe, *parent_pe; __be64 frozen_pe_no; __be16 err_type, severity; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); long rc; int state, ret = EEH_NEXT_ERR_NONE; @@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) /* Frozen parent PE ? */ state = eeh_ops->get_state(parent_pe, NULL); - if (state > 0 && - (state & active_flags) != active_flags) + if (state > 0 && !eeh_state_active(state)) *pe = parent_pe; /* Next parent level */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 443d5ca71995..99a760eae964 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -24,6 +24,7 @@ #include <asm/code-patching.h> #include <asm/smp.h> #include <asm/runlatch.h> +#include <asm/dbell.h> #include "powernv.h" #include "subcore.h" @@ -387,6 +388,86 @@ void power9_idle(void) power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * This is used in working around bugs in thread reconfiguration + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional + * memory and the way that XER[SO] is checkpointed. + * This function forces the core into SMT4 in order by asking + * all other threads not to stop, and sending a message to any + * that are in a stop state. + * Must be called with preemption disabled. + * + * DO NOT call this unless cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG) is + * true; otherwise this function will hang the system, due to the + * optimization in power9_idle_stop. + */ +void pnv_power9_force_smt4_catch(void) +{ + int cpu, cpu0, thr; + struct paca_struct *tpaca; + int awake_threads = 1; /* this thread is awake */ + int poke_threads = 0; + int need_awake = threads_per_core; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + tpaca = &paca[cpu0]; + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_inc(&tpaca[thr].dont_stop); + } + /* order setting dont_stop vs testing requested_psscr */ + mb(); + for (thr = 0; thr < threads_per_core; ++thr) { + if (!tpaca[thr].requested_psscr) + ++awake_threads; + else + poke_threads |= (1 << thr); + } + + /* If at least 3 threads are awake, the core is in SMT4 already */ + if (awake_threads < need_awake) { + /* We have to wake some threads; we'll use msgsnd */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (poke_threads & (1 << thr)) { + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, + tpaca[thr].hw_cpu_id); + } + } + /* now spin until at least 3 threads are awake */ + do { + for (thr = 0; thr < threads_per_core; ++thr) { + if ((poke_threads & (1 << thr)) && + !tpaca[thr].requested_psscr) { + ++awake_threads; + poke_threads &= ~(1 << thr); + } + } + } while (awake_threads < need_awake); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); + +void pnv_power9_force_smt4_release(void) +{ + int cpu, cpu0, thr; + struct paca_struct *tpaca; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + tpaca = &paca[cpu0]; + + /* clear all the dont_stop flags */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_dec(&tpaca[thr].dont_stop); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + #ifdef CONFIG_HOTPLUG_CPU static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) { diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 0a253b64ac5f..69a4f9e8bd55 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -410,6 +410,11 @@ struct npu_context { void *priv; }; +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + /* * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC * if none are available. @@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) return i; } @@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu) static void put_mmio_atsd_reg(struct npu *npu, int reg) { - clear_bit(reg, &npu->mmio_atsd_usage); + clear_bit_unlock(reg, &npu->mmio_atsd_usage); } /* MMIO ATSD register offsets */ #define XTS_ATSD_AVA 1 #define XTS_ATSD_STAT 2 -static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, - unsigned long va) +static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, + unsigned long launch, unsigned long va) { - int mmio_atsd_reg; - - do { - mmio_atsd_reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } while (mmio_atsd_reg < 0); + struct npu *npu = mmio_atsd_reg->npu; + int reg = mmio_atsd_reg->reg; __raw_writeq(cpu_to_be64(va), - npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA); + npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); eieio(); - __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]); - - return mmio_atsd_reg; + __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]); } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) +static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* IS set to invalidate matching PID */ + launch = PPC_BIT(12); - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); + /* PRS set to process-scoped */ + launch |= PPC_BIT(13); - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); - /* Invalidating the entire process doesn't use a va */ - return mmio_launch_invalidate(npu, launch, 0); + /* Invalidating the entire process doesn't use a va */ + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); + } } -static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid, bool flush) +static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long va, unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate target VA */ - launch = 0; + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; - /* PRS set to process scoped */ - launch |= PPC_BIT(13); + /* IS set to invalidate target VA */ + launch = 0; - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* PRS set to process scoped */ + launch |= PPC_BIT(13); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - return mmio_launch_invalidate(npu, launch, va); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); + } } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) { struct npu *npu; int i, reg; @@ -522,16 +531,67 @@ static void mmio_invalidate_wait( reg = mmio_atsd_reg[i].reg; while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) cpu_relax(); + } +} - put_mmio_atsd_reg(npu, reg); +/* + * Acquires all the address translation shootdown (ATSD) registers required to + * launch an ATSD on all links this npu_context is active on. + */ +static void acquire_atsd_reg(struct npu_context *npu_context, + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i, j; + struct npu *npu; + struct pci_dev *npdev; + struct pnv_phb *nphb; + for (i = 0; i <= max_npu2_index; i++) { + mmio_atsd_reg[i].reg = -1; + for (j = 0; j < NV_MAX_LINKS; j++) { + /* + * There are no ordering requirements with respect to + * the setup of struct npu_context, but to ensure + * consistent behaviour we need to ensure npdev[][] is + * only read once. + */ + npdev = READ_ONCE(npu_context->npdev[i][j]); + if (!npdev) + continue; + + nphb = pci_bus_to_host(npdev->bus)->private_data; + npu = &nphb->npu; + mmio_atsd_reg[i].npu = npu; + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + while (mmio_atsd_reg[i].reg < 0) { + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + cpu_relax(); + } + break; + } + } +} + +/* + * Release previously acquired ATSD registers. To avoid deadlocks the registers + * must be released in the same order they were acquired above in + * acquire_atsd_reg. + */ +static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i; + + for (i = 0; i <= max_npu2_index; i++) { /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. + * We can't rely on npu_context->npdev[][] being the same here + * as when acquire_atsd_reg() was called, hence we use the + * values stored in mmio_atsd_reg during the acquire phase + * rather than re-reading npdev[][]. */ - if (flush) - mmio_invalidate_pid(npu, 0, true); + if (mmio_atsd_reg[i].reg < 0) + continue; + + put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); } } @@ -542,10 +602,6 @@ static void mmio_invalidate_wait( static void mmio_invalidate(struct npu_context *npu_context, int va, unsigned long address, bool flush) { - int i, j; - struct npu *npu; - struct pnv_phb *nphb; - struct pci_dev *npdev; struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; @@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, * Loop over all the NPUs this process is active on and launch * an invalidate. */ - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - npdev = npu_context->npdev[i][j]; - if (!npdev) - continue; - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - mmio_atsd_reg[i].npu = npu; - - if (va) - mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid, - flush); - else - mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid, flush); - - /* - * The NPU hardware forwards the shootdown to all GPUs - * so we only have to launch one shootdown per NPU. - */ - break; - } + acquire_atsd_reg(npu_context, mmio_atsd_reg); + if (va) + mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); + else + mmio_invalidate_pid(mmio_atsd_reg, pid, flush); + + mmio_invalidate_wait(mmio_atsd_reg); + if (flush) { + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); } - - mmio_invalidate_wait(mmio_atsd_reg, flush); - if (flush) - /* Wait for the flush to complete */ - mmio_invalidate_wait(mmio_atsd_reg, false); + release_atsd_reg(mmio_atsd_reg); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); + nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); + if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", + &nvlink_index))) + return ERR_PTR(-ENODEV); + if (!mm || mm->context.id == 0) { /* * Kernel thread contexts are not supported and context id 0 is @@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, */ npu_context = mm->context.npu_context; if (!npu_context) { + rc = -ENOMEM; npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (!npu_context) - return ERR_PTR(-ENOMEM); + if (npu_context) { + kref_init(&npu_context->kref); + npu_context->mm = mm; + npu_context->mn.ops = &nv_nmmu_notifier_ops; + rc = __mmu_notifier_register(&npu_context->mn, mm); + } + + if (rc) { + kfree(npu_context); + opal_npu_destroy_context(nphb->opal_id, mm->context.id, + PCI_DEVID(gpdev->bus->number, + gpdev->devfn)); + return ERR_PTR(rc); + } mm->context.npu_context = npu_context; - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - __mmu_notifier_register(&npu_context->mn, mm); - kref_init(&npu_context->kref); } else { - kref_get(&npu_context->kref); + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); } npu_context->release_cb = cb; npu_context->priv = priv; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); - npu_context->npdev[npu->index][nvlink_index] = npdev; + + /* + * npdev is a pci_dev pointer setup by the PCI code. We assign it to + * npdev[][] to indicate to the mmu notifiers that an invalidation + * should also be sent over this nvlink. The notifiers don't use any + * other fields in npu_context, so we just need to ensure that when they + * deference npu_context->npdev[][] it is either a valid pointer or + * NULL. + */ + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); if (!nphb->npu.nmmu_flush) { /* @@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return; - npu_context->npdev[npu->index][nvlink_index] = NULL; + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); kref_put(&npu_context->kref, pnv_npu2_release_context); diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 2fa3ac80cb4e..1cb0b895a236 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -418,12 +418,12 @@ static int alloc_image_buf(char *buffer, size_t count) void *addr; int size; - if (count < sizeof(struct image_header_t)) { + if (count < sizeof(image_header)) { pr_warn("FLASH: Invalid candidate image\n"); return -EINVAL; } - memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t)); + memcpy(&image_header, (void *)buffer, sizeof(image_header)); image_data.size = be32_to_cpu(image_header.size); pr_debug("FLASH: Candidate image size = %u\n", image_data.size); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index c9e1a4ff295c..4efc95b4c7d4 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb, pr_err("HMI: out of memory, Opal message event not handled\n"); return -ENOMEM; } - memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt)); spin_lock_irqsave(&opal_hmi_evt_lock, flags); list_add(&msg_node->list, &opal_hmi_evt_list); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index f6f55ab4980e..2a14fda5ea26 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node, if (nr_chips <= 0) return -ENODEV; - base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL); + base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL); if (!base_addr_arr) return -ENOMEM; - chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL); + chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL); if (!chipid_arr) return -ENOMEM; @@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info), - GFP_KERNEL); + pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; @@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) u32 offset; /* memory for pmu */ - pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL); + pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr) return -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 8ddc1accf199..dcb42bcb5efa 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb, "handled\n"); return -ENOMEM; } - memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); + memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); spin_lock_irqsave(&opal_mem_err_lock, flags); list_add(&msg_node->list, &opal_memory_err_list); diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c index 7313b7fc9071..74986b35cf77 100644 --- a/arch/powerpc/platforms/powernv/opal-psr.c +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -136,7 +136,7 @@ void __init opal_psr_init(void) return; } - psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr), + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs), GFP_KERNEL); if (!psr_attrs) return; diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 7e5a235ebf76..541c9ea04a32 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void) if (!nr_attrs) continue; - sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr), + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs), GFP_KERNEL); if (!sgs[i].sgattrs) goto out_sgs_sgattrs; sgs[i].sg.attrs = kcalloc(nr_attrs + 1, - sizeof(struct attribute *), + sizeof(*sgs[i].sg.attrs), GFP_KERNEL); if (!sgs[i].sg.attrs) { diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 1b2936ba6040..3da30c2f26b4 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); +OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 81c0a943dea9..22d5e1110dbb 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count) __func__, dev); return SCOM_MAP_INVALID; } - m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL); + m = kmalloc(sizeof(*m), GFP_KERNEL); if (!m) return NULL; m->chip = be32_to_cpup(gcid); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c15182765ff5..516e23de5a3d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) * opal to trigger checkstop explicitly for error analysis. * The FSP PRD component would have already got notified * about this error through other channels. + * 4. We are running on a newer skiboot that by default does + * not cause a checkstop, drops us back to the kernel to + * extract context and state at the time of the error. */ - ppc_md.restart(NULL); + panic(msg); } int opal_machine_check(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 94498a04558b..cee003de63af 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -16,14 +16,6 @@ #include "pci.h" -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) { struct pci_controller *hose = pci_bus_to_host(dev->bus); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e47696ed0..3f9c69d7623a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) s64 rc; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV;; + return -ENODEV; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { @@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe( static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, int num, struct iommu_table *tbl) { + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); + int num2 = (num == 0) ? 1 : 0; long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); if (ret) return ret; - ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl); - if (ret) + if (table_group->tables[num2]) + pnv_npu_unset_window(npe, num2); + + ret = pnv_npu_set_window(npe, num, tbl); + if (ret) { pnv_pci_ioda2_unset_window(table_group, num); + if (table_group->tables[num2]) + pnv_npu_set_window(npe, num2, + table_group->tables[num2]); + } return ret; } @@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window( struct iommu_table_group *table_group, int num) { + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); + int num2 = (num == 0) ? 1 : 0; long ret = pnv_pci_ioda2_unset_window(table_group, num); if (ret) return ret; - return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num); + if (!npe->table_group.tables[num]) + return 0; + + ret = pnv_npu_unset_window(npe, num); + if (ret) + return ret; + + if (table_group->tables[num2]) + ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); + + return ret; } static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) @@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb = memblock_virt_alloc(sizeof(*phb), 0); /* Allocate PCI controller */ phb->hose = hose = pcibios_alloc_controller(np); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 69d102cbf48f..b265ecc0836a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -18,6 +18,7 @@ #include <linux/io.h> #include <linux/msi.h> #include <linux/iommu.h> +#include <linux/sched/mm.h> #include <asm/sections.h> #include <asm/io.h> @@ -38,6 +39,7 @@ #include "pci.h" static DEFINE_MUTEX(p2p_mutex); +static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { @@ -1092,6 +1094,139 @@ out: } EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return of_node_get(hose->dn); +} +EXPORT_SYMBOL(pnv_pci_get_phb_node); + +int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind) +{ + struct device_node *np; + const __be32 *prop; + struct pnv_ioda_pe *pe; + uint16_t window_id; + int rc; + + if (!radix_enabled()) + return -ENXIO; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENXIO; + + prop = of_get_property(np, "ibm,phb-indications", NULL); + of_node_put(np); + + if (!prop || !prop[1]) + return -ENXIO; + + *asnind = (u64)be32_to_cpu(prop[1]); + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + /* Increase real window size to accept as_notify messages. */ + window_id = (pe->pe_number << 1 ) + 1; + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number, + window_id, pe->tce_bypass_base, + (uint64_t)1 << 48); + return opal_error_code(rc); +} +EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel); + +int pnv_pci_disable_tunnel(struct pci_dev *dev) +{ + struct pnv_ioda_pe *pe; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + /* Restore default real window size. */ + pnv_pci_ioda2_set_bypass(pe, true); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel); + +int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) +{ + __be64 val; + struct pci_controller *hose; + struct pnv_phb *phb; + u64 tunnel_bar; + int rc; + + if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + + hose = pci_bus_to_host(dev->bus); + phb = hose->private_data; + + mutex_lock(&tunnel_mutex); + rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + tunnel_bar = be64_to_cpu(val); + if (enable) { + /* + * Only one device per PHB can use atomics. + * Our policy is first-come, first-served. + */ + if (tunnel_bar) { + if (tunnel_bar != addr) + rc = -EBUSY; + else + rc = 0; /* Setting same address twice is ok */ + goto out; + } + } else { + /* + * The device that owns atomics and wants to release + * them must pass the same address with enable == 0. + */ + if (tunnel_bar != addr) { + rc = -EPERM; + goto out; + } + addr = 0x0ULL; + } + rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); + rc = opal_error_code(rc); +out: + mutex_unlock(&tunnel_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); + +#ifdef CONFIG_PPC64 /* for thread.tidr */ +int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid, + u32 *tid) +{ + struct mm_struct *mm = NULL; + + if (task == NULL) + return -EINVAL; + + mm = get_task_mm(task); + if (mm == NULL) + return -EINVAL; + + *pid = mm->context.id; + mmput(mm); + + *tid = task->thread.tidr; + *lpid = mfspr(SPRN_LPID); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info); +#endif + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 092715b9674b..7de050a3736b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -38,57 +38,92 @@ #include <asm/smp.h> #include <asm/tm.h> #include <asm/setup.h> +#include <asm/security_features.h> #include "powernv.h" + +static bool fw_feature_is(const char *state, const char *name, + struct device_node *fw_features) +{ + struct device_node *np; + bool rc = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + rc = of_property_read_bool(np, state); + of_node_put(np); + } + + return rc; +} + +static void init_fw_feat_flags(struct device_node *np) +{ + if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); + + if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + static void pnv_setup_rfi_flush(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; - int enable; + bool enable; /* Default to fallback in case fw-features are not available */ type = L1D_FLUSH_FALLBACK; - enable = 1; np = of_find_node_by_name(NULL, "ibm,opal"); fw_features = of_get_child_by_name(np, "fw-features"); of_node_put(np); if (fw_features) { - np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); - if (np && of_property_read_bool(np, "enabled")) - type = L1D_FLUSH_MTTRIG; + init_fw_feat_flags(fw_features); + of_node_put(fw_features); - of_node_put(np); + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + type = L1D_FLUSH_MTTRIG; - np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); - if (np && of_property_read_bool(np, "enabled")) + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) type = L1D_FLUSH_ORI; - - of_node_put(np); - - /* Enable unless firmware says NOT to */ - enable = 2; - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - of_node_put(np); - - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); - if (np && of_property_read_bool(np, "disabled")) - enable = 0; - - of_node_put(np); - of_node_put(fw_features); } - setup_rfi_flush(type, enable > 0); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); + + setup_rfi_flush(type, enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index ca22f1eae050..4f7276ebdf9c 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window) return; -free_name: - kfree(window->dbgname); - window->dbgname = NULL; - remove_dir: debugfs_remove_recursive(window->dbgdir); window->dbgdir = NULL; + +free_name: + kfree(window->dbgname); + window->dbgname = NULL; } void vas_instance_init_dbgdir(struct vas_instance *vinst) { struct dentry *d; + vas_init_dbgdir(); if (!vas_debugfs) return; @@ -201,8 +202,18 @@ free_name: vinst->dbgdir = NULL; } +/* + * Set up the "root" VAS debugfs dir. Return if we already set it up + * (or failed to) in an earlier instance of VAS. + */ void vas_init_dbgdir(void) { + static bool first_time = true; + + if (!first_time) + return; + + first_time = false; vas_debugfs = debugfs_create_dir("vas", NULL); if (IS_ERR(vas_debugfs)) vas_debugfs = NULL; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h new file mode 100644 index 000000000000..a449b9f0c12e --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vas + +#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _VAS_TRACE_H +#include <linux/tracepoint.h> +#include <linux/sched.h> +#include <asm/vas.h> + +TRACE_EVENT( vas_rx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_rx_win_attr *rxattr), + + TP_ARGS(tsk, vasid, cop, rxattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_rx_win_attr *, rxattr) + __field(int, lnotify_lpid) + __field(int, lnotify_pid) + __field(int, lnotify_tid) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lnotify_lpid = rxattr->lnotify_lpid; + __entry->lnotify_pid = rxattr->lnotify_pid; + __entry->lnotify_tid = rxattr->lnotify_tid; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lnotify_lpid, __entry->lnotify_pid, + __entry->lnotify_tid) +); + +TRACE_EVENT( vas_tx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_tx_win_attr *txattr), + + TP_ARGS(tsk, vasid, cop, txattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_tx_win_attr *, txattr) + __field(int, lpid) + __field(int, pidr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lpid = txattr->lpid; + __entry->pidr = txattr->pidr; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lpid, __entry->pidr) +); + +TRACE_EVENT( vas_paste_crb, + + TP_PROTO(struct task_struct *tsk, + struct vas_window *win), + + TP_ARGS(tsk, win), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(struct vas_window *, win) + __field(int, pid) + __field(int, vasid) + __field(int, winid) + __field(unsigned long, paste_kaddr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = win->vinst->vas_id; + __entry->winid = win->winid; + __entry->paste_kaddr = (unsigned long)win->paste_kaddr + ), + + TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n", + __entry->pid, __entry->vasid, __entry->winid, + __entry->paste_kaddr) +); + +#endif /* _VAS_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv +#define TRACE_INCLUDE_FILE vas-trace +#include <trace/define_trace.h> diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index b7c53a51c31b..ff9f48812331 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -21,6 +21,9 @@ #include "vas.h" #include "copy-paste.h" +#define CREATE_TRACE_POINTS +#include "vas-trace.h" + /* * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. @@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_winctx winctx; struct vas_instance *vinst; + trace_vas_rx_win_open(current, vasid, cop, rxattr); + if (!rx_win_args_valid(cop, rxattr)) return ERR_PTR(-EINVAL); @@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_winctx winctx; struct vas_instance *vinst; + trace_vas_tx_win_open(current, vasid, cop, attr); + if (!tx_win_args_valid(cop, attr)) return ERR_PTR(-EINVAL); @@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) void *addr; uint64_t val; + trace_vas_paste_crb(current, txwin); + /* * Only NX windows are supported for now and hardware assumes * report-enable flag is set for NX windows. Ensure software diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index aebbe95c9230..5a2b24cbbc88 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -160,8 +160,6 @@ static int __init vas_init(void) int found = 0; struct device_node *dn; - vas_init_dbgdir(); - platform_driver_register(&vas_driver); for_each_compatible_node(dn, NULL, "ibm,vas") { @@ -169,8 +167,10 @@ static int __init vas_init(void) found++; } - if (!found) + if (!found) { + platform_driver_unregister(&vas_driver); return -ENODEV; + } pr_devel("Found %d instances\n", found); diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 7f870ec29daf..8c7009d001d9 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, int result; struct dma_chunk *c; - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); - + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) { result = -ENOMEM; goto fail_alloc; @@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__, phys_addr, ps3_mm_phys_to_lpar(phys_addr), len); - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); - + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) { result = -ENOMEM; goto fail_alloc; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0ee4a469a4ae..238b55fb8007 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, want_v = hpte_encode_avpn(vpn, psize, ssize); - pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", - want_v, slot, flags, psize); - flags = (newpp & 7) | H_AVPN; if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", + want_v, slot, flags, psize); + lpar_rc = plpar_pte_protect(flags, slot, want_v); if (lpar_rc == H_NOT_FOUND) { @@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift) return 0; } -/* Actually only used for radix, so far */ static int pseries_lpar_register_process_table(unsigned long base, unsigned long page_size, unsigned long table_size) { long rc; - unsigned long flags = PROC_TABLE_NEW; + unsigned long flags = 0; + if (table_size) + flags |= PROC_TABLE_NEW; if (radix_enabled()) flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; + else + flags |= PROC_TABLE_HPT_SLB; for (;;) { rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, page_size, table_size); @@ -760,6 +763,7 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + register_process_table = pseries_lpar_register_process_table; if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 0f7fb7170b03..8a8033a249c7 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -348,6 +348,9 @@ void post_mobility_fixup(void) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + /* Possibly switch to a new RFI flush type */ + pseries_setup_rfi_flush(); + return; } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 1ae1d9f4dbe9..60db2ee511fb 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs); #ifdef CONFIG_SMP extern void smp_init_pseries(void); + +/* Get state of physical CPU from query_cpu_stopped */ +int smp_query_cpu_stopped(unsigned int pcpu); +#define QCSS_STOPPED 0 +#define QCSS_STOPPING 1 +#define QCSS_NOT_STOPPED 2 +#define QCSS_HARDWARE_ERROR -1 +#define QCSS_HARDWARE_BUSY -2 #else static inline void smp_init_pseries(void) { }; #endif @@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +void pseries_setup_rfi_flush(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 1a527625acf7..1f122359cd8f 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -68,6 +68,7 @@ #include <asm/plpar_wrappers.h> #include <asm/kexec.h> #include <asm/isa-bridge.h> +#include <asm/security_features.h> #include "pseries.h" @@ -459,36 +460,67 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } -static void pseries_setup_rfi_flush(void) +static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) +{ + if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (!(result->character & H_CPU_BEHAV_FAVOUR_SECURITY)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (!(result->character & H_CPU_BEHAV_L1D_FLUSH_PR)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (!(result->character & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + +void pseries_setup_rfi_flush(void) { struct h_cpu_char_result result; enum l1d_flush_type types; bool enable; long rc; - /* Enable by default */ - enable = true; - rc = plpar_get_cpu_characteristics(&result); - if (rc == H_SUCCESS) { - types = L1D_FLUSH_NONE; + if (rc == H_SUCCESS) + init_cpu_char_feature_flags(&result); + + /* + * We're the guest so this doesn't apply to us, clear it to simplify + * handling of it elsewhere. + */ + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); - if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) - types |= L1D_FLUSH_MTTRIG; - if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) - types |= L1D_FLUSH_ORI; + types = L1D_FLUSH_FALLBACK; - /* Use fallback if nothing set in hcall */ - if (types == L1D_FLUSH_NONE) - types = L1D_FLUSH_FALLBACK; + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + types |= L1D_FLUSH_MTTRIG; - if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || - (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))) - enable = false; - } else { - /* Default to fallback if case hcall is not available */ - types = L1D_FLUSH_FALLBACK; - } + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) + types |= L1D_FLUSH_ORI; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); } @@ -739,7 +771,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) /* PAPR says we can't set HYP */ dawrx &= ~DAWRX_HYP; - return plapr_set_watchpoint0(dawr, dawrx); + return plpar_set_watchpoint0(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 2e184829e5d4..66b6f119d599 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu) hwcpu = get_hard_smp_processor_id(cpu); } - if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS) + if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS) return 1; return 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 82e1a3ee6e0f..53918023622e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -41,6 +41,7 @@ #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> +#include <asm/plpar_wrappers.h> #include <asm/cputable.h> #include <asm/rtas.h> #include <asm/sstep.h> @@ -61,12 +62,6 @@ #include <asm/paca.h> #endif -#if defined(CONFIG_PPC_SPLPAR) -#include <asm/plpar_wrappers.h> -#else -static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; -#endif - #include "nonstdio.h" #include "dis-asm.h" @@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr) mtspr(SPRN_CIABR, ciabr); return; } - plapr_set_ciabr(ciabr); + plpar_set_ciabr(ciabr); } /** @@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr) return 1; } +/* Force enable xmon if not already enabled */ +static inline void force_enable_xmon(void) +{ + /* Enable xmon hooks if needed */ + if (!xmon_on) { + printf("xmon: Enabling debugger hooks\n"); + xmon_on = 1; + } +} + static char *breakpoint_help_string = "Breakpoint command usage:\n" "b show breakpoints\n" @@ -1297,6 +1302,10 @@ bpt_cmds(void) static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ + if (!ppc_breakpoint_available()) { + printf("Hardware data breakpoint not supported on this cpu\n"); + break; + } mode = 7; cmd = inchar(); if (cmd == 'r') @@ -1315,6 +1324,8 @@ bpt_cmds(void) dabr.address &= ~HW_BRK_TYPE_DABR; dabr.enabled = mode | BP_DABR; } + + force_enable_xmon(); break; case 'i': /* bi - hardware instr breakpoint */ @@ -1335,6 +1346,7 @@ bpt_cmds(void) if (bp != NULL) { bp->enabled |= BP_CIABR; iabr = bp; + force_enable_xmon(); } break; #endif @@ -1399,8 +1411,10 @@ bpt_cmds(void) if (!check_bp_loc(a)) break; bp = new_breakpoint(a); - if (bp != NULL) + if (bp != NULL) { bp->enabled |= BP_TRAP; + force_enable_xmon(); + } break; } } @@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq); #endif /* CONFIG_MAGIC_SYSRQ */ #ifdef CONFIG_DEBUG_FS +static void clear_all_bpt(void) +{ + int i; + + /* clear/unpatch all breakpoints */ + remove_bpts(); + remove_cpu_bpts(); + + /* Disable all breakpoints */ + for (i = 0; i < NBPTS; ++i) + bpts[i].enabled = 0; + + /* Clear any data or iabr breakpoints */ + if (iabr || dabr.enabled) { + iabr = NULL; + dabr.enabled = 0; + } + + printf("xmon: All breakpoints cleared\n"); +} + static int xmon_dbgfs_set(void *data, u64 val) { xmon_on = !!val; xmon_init(xmon_on); + /* make sure all breakpoints removed when disabling */ + if (!xmon_on) + clear_all_bpt(); return 0; } |