diff options
233 files changed, 5676 insertions, 1658 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 4bbeca8483ed..4227ec2e3ab2 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -300,6 +300,7 @@ architectures: - arm - ppc - mips +- s390 3. Configuring Kprobes diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index afe68ddbe6a4..052ee643a32e 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -5,7 +5,7 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension -ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole +ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space diff --git a/arch/Kconfig b/arch/Kconfig index 0eae9df35b88..05d7a8a458d5 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -323,6 +323,17 @@ config HAVE_ARCH_SECCOMP_FILTER results in the system call being skipped immediately. - seccomp syscall wired up + For best performance, an arch should use seccomp_phase1 and + seccomp_phase2 directly. It should call seccomp_phase1 for all + syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not + need to be called from a ptrace-safe context. It must then + call seccomp_phase2 if seccomp_phase1 returns anything other + than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP. + + As an additional optimization, an arch may provide seccomp_data + directly to seccomp_phase1; this avoids multiple calls + to the syscall_xyz helpers for every syscall. + config SECCOMP_FILTER def_bool y depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 0c27ed6f3f23..5e772a21ab97 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) current_thread_info()->syscall = scno; /* Do the secure computing check first; failures should be fast. */ - if (secure_computing(scno) == -1) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER + if (secure_computing() == -1) return -1; +#else + /* XXX: remove this once OABI gets fixed */ + secure_computing_strict(scno); +#endif if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 10a14ead70b9..f3b51b57740a 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 317 /* length of syscall table */ +#define NR_syscalls 318 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 18026b2eb582..4c2240c1b0cb 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -330,5 +330,6 @@ #define __NR_renameat2 1338 #define __NR_getrandom 1339 #define __NR_memfd_create 1340 +#define __NR_bpf 1341 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 01edf242eb29..f5e96dffc63c 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1778,6 +1778,7 @@ sys_call_table: data8 sys_renameat2 data8 sys_getrandom data8 sys_memfd_create // 1340 + data8 sys_bpf .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/m68k/platform/68000/Makefile b/arch/m68k/68000/Makefile index 1eab70c7194b..1eab70c7194b 100644 --- a/arch/m68k/platform/68000/Makefile +++ b/arch/m68k/68000/Makefile diff --git a/arch/m68k/platform/68000/bootlogo-vz.h b/arch/m68k/68000/bootlogo-vz.h index b38e2b255142..b38e2b255142 100644 --- a/arch/m68k/platform/68000/bootlogo-vz.h +++ b/arch/m68k/68000/bootlogo-vz.h diff --git a/arch/m68k/platform/68000/bootlogo.h b/arch/m68k/68000/bootlogo.h index b896c933fafc..b896c933fafc 100644 --- a/arch/m68k/platform/68000/bootlogo.h +++ b/arch/m68k/68000/bootlogo.h diff --git a/arch/m68k/platform/68000/entry.S b/arch/m68k/68000/entry.S index 23ac054c6e1a..259b3661b614 100644 --- a/arch/m68k/platform/68000/entry.S +++ b/arch/m68k/68000/entry.S @@ -1,5 +1,5 @@ /* - * linux/arch/m68knommu/platform/68328/entry.S + * entry.S -- non-mmu 68000 interrupt and exception entry points * * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/arch/m68k/platform/68000/head.S b/arch/m68k/68000/head.S index 536ef9616dad..536ef9616dad 100644 --- a/arch/m68k/platform/68000/head.S +++ b/arch/m68k/68000/head.S diff --git a/arch/m68k/platform/68000/ints.c b/arch/m68k/68000/ints.c index cda49b12d7be..cda49b12d7be 100644 --- a/arch/m68k/platform/68000/ints.c +++ b/arch/m68k/68000/ints.c diff --git a/arch/m68k/platform/68000/m68328.c b/arch/m68k/68000/m68328.c index e53caf4c3bfb..e53caf4c3bfb 100644 --- a/arch/m68k/platform/68000/m68328.c +++ b/arch/m68k/68000/m68328.c diff --git a/arch/m68k/platform/68000/m68EZ328.c b/arch/m68k/68000/m68EZ328.c index 21952906e9e2..21952906e9e2 100644 --- a/arch/m68k/platform/68000/m68EZ328.c +++ b/arch/m68k/68000/m68EZ328.c diff --git a/arch/m68k/platform/68000/m68VZ328.c b/arch/m68k/68000/m68VZ328.c index 0e5e5a10a021..0e5e5a10a021 100644 --- a/arch/m68k/platform/68000/m68VZ328.c +++ b/arch/m68k/68000/m68VZ328.c diff --git a/arch/m68k/platform/68000/romvec.S b/arch/m68k/68000/romvec.S index 15c70cd6453f..15c70cd6453f 100644 --- a/arch/m68k/platform/68000/romvec.S +++ b/arch/m68k/68000/romvec.S diff --git a/arch/m68k/platform/68000/timers.c b/arch/m68k/68000/timers.c index 99a98698bc95..99a98698bc95 100644 --- a/arch/m68k/platform/68000/timers.c +++ b/arch/m68k/68000/timers.c diff --git a/arch/m68k/platform/68360/Makefile b/arch/m68k/68360/Makefile index f6f434383049..591ce42df3de 100644 --- a/arch/m68k/platform/68360/Makefile +++ b/arch/m68k/68360/Makefile @@ -1,5 +1,5 @@ # -# Makefile for arch/m68knommu/platform/68360. +# Makefile for 68360 machines. # model-y := ram model-$(CONFIG_ROMKERNEL) := rom diff --git a/arch/m68k/platform/68360/commproc.c b/arch/m68k/68360/commproc.c index 315727b7ff40..315727b7ff40 100644 --- a/arch/m68k/platform/68360/commproc.c +++ b/arch/m68k/68360/commproc.c diff --git a/arch/m68k/platform/68360/config.c b/arch/m68k/68360/config.c index d493ac43fe3f..17ec416fed9d 100644 --- a/arch/m68k/platform/68360/config.c +++ b/arch/m68k/68360/config.c @@ -1,5 +1,5 @@ /* - * linux/arch/m68knommu/platform/68360/config.c + * config.c - non-mmu 68360 platform initialization code * * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> * Copyright (C) 1993 Hamish Macdonald diff --git a/arch/m68k/platform/68360/entry.S b/arch/m68k/68360/entry.S index 447c33ef37fd..22eb3022f9ee 100644 --- a/arch/m68k/platform/68360/entry.S +++ b/arch/m68k/68360/entry.S @@ -1,5 +1,5 @@ /* - * linux/arch/m68knommu/platform/68360/entry.S + * entry.S - non-mmu 68360 interrupt and exceptions entry points * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 SED Systems, a Division of Calian Ltd. diff --git a/arch/m68k/platform/68360/head-ram.S b/arch/m68k/68360/head-ram.S index acd213170d80..62bc56f41d57 100644 --- a/arch/m68k/platform/68360/head-ram.S +++ b/arch/m68k/68360/head-ram.S @@ -1,6 +1,5 @@ -/* arch/m68knommu/platform/68360/head-ram.S - * - * Startup code for Motorola 68360 +/* + * head-ram.S - startup code for Motorola 68360 * * Copyright 2001 (C) SED Systems, a Division of Calian Ltd. * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S diff --git a/arch/m68k/platform/68360/head-rom.S b/arch/m68k/68360/head-rom.S index dfc756d99886..b3a7e40f35e1 100644 --- a/arch/m68k/platform/68360/head-rom.S +++ b/arch/m68k/68360/head-rom.S @@ -1,6 +1,5 @@ -/* arch/m68knommu/platform/68360/head-rom.S - * - * Startup code for Motorola 68360 +/* + * head-rom.S - startup code for Motorola 68360 * * Copyright (C) SED Systems, a Division of Calian Ltd. * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S diff --git a/arch/m68k/platform/68360/ints.c b/arch/m68k/68360/ints.c index 8cd42692331b..2360fc046681 100644 --- a/arch/m68k/platform/68360/ints.c +++ b/arch/m68k/68360/ints.c @@ -1,5 +1,5 @@ /* - * linux/arch/$(ARCH)/platform/$(PLATFORM)/ints.c + * ints.c - first level interrupt handlers * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 7f7830f2c5bc..0b29dcfef69f 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -92,9 +92,9 @@ endif # head-y := arch/m68k/kernel/head.o head-$(CONFIG_SUN3) := arch/m68k/kernel/sun3-head.o -head-$(CONFIG_M68360) := arch/m68k/platform/68360/head.o -head-$(CONFIG_M68000) := arch/m68k/platform/68000/head.o -head-$(CONFIG_COLDFIRE) := arch/m68k/platform/coldfire/head.o +head-$(CONFIG_M68360) := arch/m68k/68360/head.o +head-$(CONFIG_M68000) := arch/m68k/68000/head.o +head-$(CONFIG_COLDFIRE) := arch/m68k/coldfire/head.o core-y += arch/m68k/kernel/ arch/m68k/mm/ libs-y += arch/m68k/lib/ @@ -114,9 +114,9 @@ core-$(CONFIG_NATFEAT) += arch/m68k/emu/ core-$(CONFIG_M68040) += arch/m68k/fpsp040/ core-$(CONFIG_M68060) += arch/m68k/ifpsp060/ core-$(CONFIG_M68KFPU_EMU) += arch/m68k/math-emu/ -core-$(CONFIG_M68360) += arch/m68k/platform/68360/ -core-$(CONFIG_M68000) += arch/m68k/platform/68000/ -core-$(CONFIG_COLDFIRE) += arch/m68k/platform/coldfire/ +core-$(CONFIG_M68360) += arch/m68k/68360/ +core-$(CONFIG_M68000) += arch/m68k/68000/ +core-$(CONFIG_COLDFIRE) += arch/m68k/coldfire/ all: zImage diff --git a/arch/m68k/platform/coldfire/Makefile b/arch/m68k/coldfire/Makefile index 68f0fac60099..68f0fac60099 100644 --- a/arch/m68k/platform/coldfire/Makefile +++ b/arch/m68k/coldfire/Makefile diff --git a/arch/m68k/platform/coldfire/cache.c b/arch/m68k/coldfire/cache.c index 71beeaf0c5c4..71beeaf0c5c4 100644 --- a/arch/m68k/platform/coldfire/cache.c +++ b/arch/m68k/coldfire/cache.c diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/coldfire/clk.c index fddfdccae63b..fddfdccae63b 100644 --- a/arch/m68k/platform/coldfire/clk.c +++ b/arch/m68k/coldfire/clk.c diff --git a/arch/m68k/platform/coldfire/device.c b/arch/m68k/coldfire/device.c index 71ea4c02795d..71ea4c02795d 100644 --- a/arch/m68k/platform/coldfire/device.c +++ b/arch/m68k/coldfire/device.c diff --git a/arch/m68k/platform/coldfire/dma.c b/arch/m68k/coldfire/dma.c index df5ce20d181c..df5ce20d181c 100644 --- a/arch/m68k/platform/coldfire/dma.c +++ b/arch/m68k/coldfire/dma.c diff --git a/arch/m68k/platform/coldfire/dma_timer.c b/arch/m68k/coldfire/dma_timer.c index 235ad57c4707..235ad57c4707 100644 --- a/arch/m68k/platform/coldfire/dma_timer.c +++ b/arch/m68k/coldfire/dma_timer.c diff --git a/arch/m68k/platform/coldfire/entry.S b/arch/m68k/coldfire/entry.S index 881ab8e379d4..52d312d5b4d4 100644 --- a/arch/m68k/platform/coldfire/entry.S +++ b/arch/m68k/coldfire/entry.S @@ -1,5 +1,5 @@ /* - * linux/arch/m68knommu/platform/5307/entry.S + * entry.S -- interrupt and exception processing for ColdFire * * Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 1998 D. Jeff Dionne <jeff@lineo.ca>, diff --git a/arch/m68k/platform/coldfire/firebee.c b/arch/m68k/coldfire/firebee.c index 46d50534f981..46d50534f981 100644 --- a/arch/m68k/platform/coldfire/firebee.c +++ b/arch/m68k/coldfire/firebee.c diff --git a/arch/m68k/platform/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index e7e428681ec5..e7e428681ec5 100644 --- a/arch/m68k/platform/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c diff --git a/arch/m68k/platform/coldfire/head.S b/arch/m68k/coldfire/head.S index fa31be297b85..fa31be297b85 100644 --- a/arch/m68k/platform/coldfire/head.S +++ b/arch/m68k/coldfire/head.S diff --git a/arch/m68k/platform/coldfire/intc-2.c b/arch/m68k/coldfire/intc-2.c index 995093357c59..995093357c59 100644 --- a/arch/m68k/platform/coldfire/intc-2.c +++ b/arch/m68k/coldfire/intc-2.c diff --git a/arch/m68k/platform/coldfire/intc-5249.c b/arch/m68k/coldfire/intc-5249.c index b0d1641053e4..b0d1641053e4 100644 --- a/arch/m68k/platform/coldfire/intc-5249.c +++ b/arch/m68k/coldfire/intc-5249.c diff --git a/arch/m68k/platform/coldfire/intc-525x.c b/arch/m68k/coldfire/intc-525x.c index b23204d059ac..b23204d059ac 100644 --- a/arch/m68k/platform/coldfire/intc-525x.c +++ b/arch/m68k/coldfire/intc-525x.c diff --git a/arch/m68k/platform/coldfire/intc-5272.c b/arch/m68k/coldfire/intc-5272.c index d7b695629a7e..d1e2fbad327c 100644 --- a/arch/m68k/platform/coldfire/intc-5272.c +++ b/arch/m68k/coldfire/intc-5272.c @@ -36,7 +36,7 @@ * they also need acknowledging via acknowledge bits. */ struct irqmap { - unsigned char icr; + unsigned int icr; unsigned char index; unsigned char ack; }; diff --git a/arch/m68k/platform/coldfire/intc-simr.c b/arch/m68k/coldfire/intc-simr.c index 7cf2c156f72d..7cf2c156f72d 100644 --- a/arch/m68k/platform/coldfire/intc-simr.c +++ b/arch/m68k/coldfire/intc-simr.c diff --git a/arch/m68k/platform/coldfire/intc.c b/arch/m68k/coldfire/intc.c index cce257420388..cce257420388 100644 --- a/arch/m68k/platform/coldfire/intc.c +++ b/arch/m68k/coldfire/intc.c diff --git a/arch/m68k/platform/coldfire/m5206.c b/arch/m68k/coldfire/m5206.c index 0e55f449a88c..8945f5e7b39c 100644 --- a/arch/m68k/platform/coldfire/m5206.c +++ b/arch/m68k/coldfire/m5206.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5206/config.c + * m5206.c -- platform support for ColdFire 5206 based boards * * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2000-2001, Lineo Inc. (www.lineo.com) diff --git a/arch/m68k/platform/coldfire/m520x.c b/arch/m68k/coldfire/m520x.c index 4040a3c93733..173834f251eb 100644 --- a/arch/m68k/platform/coldfire/m520x.c +++ b/arch/m68k/coldfire/m520x.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/520x/config.c + * m520x.c -- platform support for ColdFire 520x based boards * * Copyright (C) 2005, Freescale (www.freescale.com) * Copyright (C) 2005, Intec Automation (mike@steroidmicros.com) diff --git a/arch/m68k/platform/coldfire/m523x.c b/arch/m68k/coldfire/m523x.c index 6b7135e6d5b4..a191a467eff2 100644 --- a/arch/m68k/platform/coldfire/m523x.c +++ b/arch/m68k/coldfire/m523x.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/523x/config.c + * m523x.c -- platform support for ColdFire 523x based boards * * Sub-architcture dependent initialization code for the Freescale * 523x CPUs. diff --git a/arch/m68k/platform/coldfire/m5249.c b/arch/m68k/coldfire/m5249.c index f6253a3313b3..e48f55adc447 100644 --- a/arch/m68k/platform/coldfire/m5249.c +++ b/arch/m68k/coldfire/m5249.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5249/config.c + * m5249.c -- platform support for ColdFire 5249 based boards * * Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com) */ diff --git a/arch/m68k/platform/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 1adba3909035..3d8583e2187c 100644 --- a/arch/m68k/platform/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * 525x.c + * 525x.c -- platform support for ColdFire 525x based boards * * Copyright (C) 2012, Steven King <sfking@fdwdc.com> */ diff --git a/arch/m68k/platform/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index 8a4d3cc322c6..b15219ed22bf 100644 --- a/arch/m68k/platform/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5272/config.c + * m5272.c -- platform support for ColdFire 5272 based boards * * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2001-2002, SnapGear Inc. (www.snapgear.com) diff --git a/arch/m68k/platform/coldfire/m527x.c b/arch/m68k/coldfire/m527x.c index 62d81ef016f1..2ba470735bed 100644 --- a/arch/m68k/platform/coldfire/m527x.c +++ b/arch/m68k/coldfire/m527x.c @@ -1,10 +1,10 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/527x/config.c + * m527x.c -- platform support for ColdFire 527x based boards * * Sub-architcture dependent initialization code for the Freescale - * 5270/5271 CPUs. + * 5270/5271 and 5274/5275 CPUs. * * Copyright (C) 1999-2004, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2001-2004, SnapGear Inc. (www.snapgear.com) diff --git a/arch/m68k/platform/coldfire/m528x.c b/arch/m68k/coldfire/m528x.c index 21cd161d36f1..45e947aeade4 100644 --- a/arch/m68k/platform/coldfire/m528x.c +++ b/arch/m68k/coldfire/m528x.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/528x/config.c + * m528x.c -- platform support for ColdFire 528x based boards * * Sub-architcture dependent initialization code for the Freescale * 5280, 5281 and 5282 CPUs. diff --git a/arch/m68k/platform/coldfire/m5307.c b/arch/m68k/coldfire/m5307.c index 887435361386..2da1d146e344 100644 --- a/arch/m68k/platform/coldfire/m5307.c +++ b/arch/m68k/coldfire/m5307.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5307/config.c + * m5307.c -- platform support for ColdFire 5307 based boards * * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2000, Lineo (www.lineo.com) diff --git a/arch/m68k/platform/coldfire/m53xx.c b/arch/m68k/coldfire/m53xx.c index 80879a7fe3d5..80879a7fe3d5 100644 --- a/arch/m68k/platform/coldfire/m53xx.c +++ b/arch/m68k/coldfire/m53xx.c diff --git a/arch/m68k/platform/coldfire/m5407.c b/arch/m68k/coldfire/m5407.c index 2fb3cdbfde30..738eba6be40e 100644 --- a/arch/m68k/platform/coldfire/m5407.c +++ b/arch/m68k/coldfire/m5407.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5407/config.c + * m5407.c -- platform support for ColdFire 5407 based boards * * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2000, Lineo (www.lineo.com) diff --git a/arch/m68k/platform/coldfire/m5441x.c b/arch/m68k/coldfire/m5441x.c index 98a13cce93d8..98a13cce93d8 100644 --- a/arch/m68k/platform/coldfire/m5441x.c +++ b/arch/m68k/coldfire/m5441x.c diff --git a/arch/m68k/platform/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c index 952da53aa0bc..075aaabd1360 100644 --- a/arch/m68k/platform/coldfire/m54xx.c +++ b/arch/m68k/coldfire/m54xx.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/54xx/config.c + * m54xx.c -- platform support for ColdFire 54xx based boards * * Copyright (C) 2010, Philippe De Muyter <phdm@macqel.be> */ @@ -23,7 +23,6 @@ #include <asm/mcfuart.h> #include <asm/mcfclk.h> #include <asm/m54xxgpt.h> -#include <asm/mcfclk.h> #ifdef CONFIG_MMU #include <asm/mmu_context.h> #endif diff --git a/arch/m68k/platform/coldfire/mcf8390.c b/arch/m68k/coldfire/mcf8390.c index 23a6874a3248..23a6874a3248 100644 --- a/arch/m68k/platform/coldfire/mcf8390.c +++ b/arch/m68k/coldfire/mcf8390.c diff --git a/arch/m68k/platform/coldfire/nettel.c b/arch/m68k/coldfire/nettel.c index ddc48ec1b800..ddc48ec1b800 100644 --- a/arch/m68k/platform/coldfire/nettel.c +++ b/arch/m68k/coldfire/nettel.c diff --git a/arch/m68k/platform/coldfire/pci.c b/arch/m68k/coldfire/pci.c index df9679238b6d..df9679238b6d 100644 --- a/arch/m68k/platform/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c diff --git a/arch/m68k/platform/coldfire/pit.c b/arch/m68k/coldfire/pit.c index 493b3111d4c1..493b3111d4c1 100644 --- a/arch/m68k/platform/coldfire/pit.c +++ b/arch/m68k/coldfire/pit.c diff --git a/arch/m68k/platform/coldfire/reset.c b/arch/m68k/coldfire/reset.c index f30952f0cbe6..f30952f0cbe6 100644 --- a/arch/m68k/platform/coldfire/reset.c +++ b/arch/m68k/coldfire/reset.c diff --git a/arch/m68k/platform/coldfire/sltimers.c b/arch/m68k/coldfire/sltimers.c index 831a08cf6f40..831a08cf6f40 100644 --- a/arch/m68k/platform/coldfire/sltimers.c +++ b/arch/m68k/coldfire/sltimers.c diff --git a/arch/m68k/platform/coldfire/timers.c b/arch/m68k/coldfire/timers.c index cd496a20fcc7..cd496a20fcc7 100644 --- a/arch/m68k/platform/coldfire/timers.c +++ b/arch/m68k/coldfire/timers.c diff --git a/arch/m68k/platform/coldfire/vectors.c b/arch/m68k/coldfire/vectors.c index a4dbdecbec7a..08923fe600e0 100644 --- a/arch/m68k/platform/coldfire/vectors.c +++ b/arch/m68k/coldfire/vectors.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/coldfire/vectors.c + * vectors.c -- high level trap setup for ColdFire * * Copyright (C) 1999-2007, Greg Ungerer <gerg@snapgear.com> */ diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h index 52f7e8499172..be4b5a813ad4 100644 --- a/arch/m68k/include/asm/io_no.h +++ b/arch/m68k/include/asm/io_no.h @@ -179,6 +179,15 @@ static inline void *ioremap_fullcache(unsigned long physaddr, unsigned long size */ #define xlate_dev_kmem_ptr(p) p +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *) port; +} + +static inline void ioport_unmap(void __iomem *p) +{ +} + #endif /* __KERNEL__ */ #endif /* _M68KNOMMU_IO_H */ diff --git a/arch/m68k/include/asm/m54xxpci.h b/arch/m68k/include/asm/m54xxpci.h index 6fbf54f72f2e..4687f5aa3741 100644 --- a/arch/m68k/include/asm/m54xxpci.h +++ b/arch/m68k/include/asm/m54xxpci.h @@ -72,7 +72,7 @@ #define PCIRFWPR (CONFIG_MBAR + 0x84d4) /* RX FIFO write pointer */ #define PACR (CONFIG_MBAR + 0xc00) /* PCI arbiter control */ -#define PASR (COFNIG_MBAR + 0xc04) /* PCI arbiter status */ +#define PASR (CONFIG_MBAR + 0xc04) /* PCI arbiter status */ /* * Definitions for the Global status and control register. diff --git a/arch/m68k/platform/Makefile b/arch/m68k/platform/Makefile deleted file mode 100644 index fc932bf65d34..000000000000 --- a/arch/m68k/platform/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# -# Makefile for the arch/m68knommu/platform. -# diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 645b3c4fcfba..f7aac5b57b4b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) long ret = 0; user_exit(); - if (secure_computing(syscall) == -1) + if (secure_computing() == -1) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE) && diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 296391395b95..f2cf1f90295b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,9 @@ config NO_IOPORT_MAP config PCI_QUIRKS def_bool n +config ARCH_SUPPORTS_UPROBES + def_bool 64BIT + config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -97,6 +100,7 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 + select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES if !SMP select GENERIC_FIND_FIRST_BIT @@ -113,10 +117,11 @@ config S390 select HAVE_CMPXCHG_LOCAL select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE if 64BIT + select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if 64BIT + select HAVE_FUNCTION_TRACER if 64BIT select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 874e6d6e9c5f..878e67973151 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,13 +35,16 @@ endif export LD_BFD -cflags-$(CONFIG_MARCH_G5) += -march=g5 -cflags-$(CONFIG_MARCH_Z900) += -march=z900 -cflags-$(CONFIG_MARCH_Z990) += -march=z990 -cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 -cflags-$(CONFIG_MARCH_Z10) += -march=z10 -cflags-$(CONFIG_MARCH_Z196) += -march=z196 -cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 +mflags-$(CONFIG_MARCH_G5) := -march=g5 +mflags-$(CONFIG_MARCH_Z900) := -march=z900 +mflags-$(CONFIG_MARCH_Z990) := -march=z990 +mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 +mflags-$(CONFIG_MARCH_Z10) := -march=z10 +mflags-$(CONFIG_MARCH_Z196) := -march=z196 +mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 + +aflags-y += $(mflags-y) +cflags-y += $(mflags-y) cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956b752b..b5dce6544d76 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -15,11 +15,13 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 14,0\n" #else -#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 15,0\n" #endif +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + #define rmb() mb() #define wmb() mb() #define read_barrier_depends() do { } while(0) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 3001887f94b7..f8c196984853 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,8 +8,6 @@ #define _S390_CPUTIME_H #include <linux/types.h> -#include <linux/percpu.h> -#include <linux/spinlock.h> #include <asm/div64.h> @@ -167,28 +165,8 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime) return clock; } -struct s390_idle_data { - int nohz_delay; - unsigned int sequence; - unsigned long long idle_count; - unsigned long long idle_time; - unsigned long long clock_idle_enter; - unsigned long long clock_idle_exit; - unsigned long long timer_idle_enter; - unsigned long long timer_idle_exit; -}; +cputime64_t arch_cpu_idle_time(int cpu); -DECLARE_PER_CPU(struct s390_idle_data, s390_idle); - -cputime64_t s390_get_idle_time(int cpu); - -#define arch_idle_time(cpu) s390_get_idle_time(cpu) - -static inline int s390_nohz_delay(int cpu) -{ - return __get_cpu_var(s390_idle).nohz_delay != 0; -} - -#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) +#define arch_idle_time(cpu) arch_cpu_idle_time(cpu) #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h index 04a83f5773cd..60323c21938b 100644 --- a/arch/s390/include/asm/dis.h +++ b/arch/s390/include/asm/dis.h @@ -13,12 +13,13 @@ #define OPERAND_FPR 0x2 /* Operand printed as %fx */ #define OPERAND_AR 0x4 /* Operand printed as %ax */ #define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ +#define OPERAND_VR 0x10 /* Operand printed as %vx */ +#define OPERAND_DISP 0x20 /* Operand printed as displacement */ +#define OPERAND_BASE 0x40 /* Operand printed as base register */ +#define OPERAND_INDEX 0x80 /* Operand printed as index register */ +#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ struct s390_operand { diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 78f4f8711d58..f6e43d39e3d8 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -102,6 +102,7 @@ #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 #define HWCAP_S390_TE 1024 +#define HWCAP_S390_VXRS 2048 /* * These are used to set parameters in the core dumps. @@ -225,6 +226,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); #endif diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index bf246dae1367..3aef8afec336 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); +extern char ftrace_graph_caller_end; struct dyn_arch_ftrace { }; @@ -17,10 +18,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 -#else -#define MCOUNT_INSN_SIZE 22 -#endif +#define MCOUNT_INSN_SIZE 18 + +#define ARCH_SUPPORTS_FTRACE_OPS 1 #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h new file mode 100644 index 000000000000..6af037f574b8 --- /dev/null +++ b/arch/s390/include/asm/idle.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2014 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _S390_IDLE_H +#define _S390_IDLE_H + +#include <linux/types.h> +#include <linux/device.h> + +struct s390_idle_data { + unsigned int sequence; + unsigned long long idle_count; + unsigned long long idle_time; + unsigned long long clock_idle_enter; + unsigned long long clock_idle_exit; + unsigned long long timer_idle_enter; + unsigned long long timer_idle_exit; +}; + +extern struct device_attribute dev_attr_idle_count; +extern struct device_attribute dev_attr_idle_time_us; + +#endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index c81661e756a0..ece606c2ee86 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -89,12 +89,12 @@ extern u32 ipl_flags; extern u32 dump_prefix_page; struct dump_save_areas { - struct save_area **areas; + struct save_area_ext **areas; int count; }; extern struct dump_save_areas dump_save_areas; -struct save_area *dump_save_area_create(int cpu); +struct save_area_ext *dump_save_area_create(int cpu); extern void do_reipl(void); extern void do_halt(void); diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index c4dd400a2791..e787cc1bff8f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -51,6 +51,7 @@ enum interruption_class { IRQEXT_CMS, IRQEXT_CMC, IRQEXT_CMR, + IRQEXT_FTP, IRQIO_CIO, IRQIO_QAI, IRQIO_DAS, diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 4176dfe0fba1..98629173ce3b 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -84,6 +84,10 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +int probe_is_prohibited_opcode(u16 *insn); +int probe_get_fixup_type(u16 *insn); +int probe_is_insn_relative_long(u16 *insn); + #define flush_insn_slot(p) do { } while (0) #endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4349197ab9df..6cc51fe84410 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/ptrace.h> #include <asm/cpu.h> +#include <asm/types.h> #ifdef CONFIG_32BIT @@ -31,6 +32,11 @@ struct save_area { u32 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { psw_t restart_psw; /* 0x0000 */ psw_t restart_old_psw; /* 0x0008 */ @@ -183,6 +189,11 @@ struct save_area { u64 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ @@ -310,7 +321,10 @@ struct _lowcore { /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ - __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ + __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ + + /* Pointer to vector register save area */ + __u64 vector_save_area_addr; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -334,9 +348,10 @@ struct _lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ + __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - /* align to the top of the prefix area */ - __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ + /* Software defined save area for vector registers */ + __u8 vector_save_area[1024]; /* 0x1c00 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 35f8ec185616..3027a5a72b74 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -38,7 +38,7 @@ struct mci { __u32 pm : 1; /* 22 psw program mask and cc validity */ __u32 ia : 1; /* 23 psw instruction address validity */ __u32 fa : 1; /* 24 failing storage address validity */ - __u32 : 1; /* 25 */ + __u32 vr : 1; /* 25 vector register validity */ __u32 ec : 1; /* 26 external damage code validity */ __u32 fp : 1; /* 27 floating point register validity */ __u32 gr : 1; /* 28 general register validity */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7054356cc98..57c882761dea 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -217,7 +217,6 @@ extern unsigned long MODULES_END; */ /* Hardware bits in the page table entry */ -#define _PAGE_CO 0x100 /* HW Change-bit override */ #define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ #define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ @@ -234,8 +233,8 @@ extern unsigned long MODULES_END; #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_DIRTY | _PAGE_YOUNG) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \ + _PAGE_YOUNG) /* * handle_pte_fault uses pte_present, pte_none and pte_file to find out the @@ -354,7 +353,6 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ -#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -371,7 +369,6 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ #define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ @@ -873,8 +870,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); } else { - if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) - pte_val(entry) |= _PAGE_CO; *ptep = entry; } } @@ -1044,6 +1039,22 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); } +static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidate a range of ptes + global TLB flush of the ptes */ + do { + asm volatile( + " .insn rrf,0xb2210000,%2,%0,%1,0" + : "+a" (address), "+a" (nr) : "a" (pto) : "memory"); + } while (nr != 255); +} + static inline void ptep_flush_direct(struct mm_struct *mm, unsigned long address, pte_t *ptep) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e568fc8a7250..d559bdb03d18 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -13,9 +13,11 @@ #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */ +#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) #define _CIF_ASCE (1<<CIF_ASCE) +#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY) #ifndef __ASSEMBLY__ @@ -43,6 +45,8 @@ static inline int test_cpu_flag(int flag) return !!(S390_lowcore.cpu_flags & (1U << flag)); } +#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -113,6 +117,7 @@ struct thread_struct { int ri_signum; #ifdef CONFIG_64BIT unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + __vector128 *vxrs; /* Vector register save area */ #endif }; @@ -285,7 +290,12 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) return (psw.addr - ilc) & mask; #endif } - + +/* + * Function to stop a processor until the next interrupt occurs + */ +void enabled_wait(void); + /* * Function to drop a processor into disabled wait state */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 55d69dd7473c..be317feff7ac 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -161,6 +161,12 @@ static inline long regs_return_value(struct pt_regs *regs) return regs->gprs[2]; } +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->psw.addr = val | PSW_ADDR_AMODE; +} + int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 089a49814c50..7736fdd72595 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -55,8 +55,8 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_LPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) -#define MACHINE_FLAG_RRBM (1UL << 16) #define MACHINE_FLAG_TLB_LC (1UL << 17) +#define MACHINE_FLAG_VX (1UL << 18) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -78,8 +78,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (0) #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) -#define MACHINE_HAS_RRBM (0) #define MACHINE_HAS_TLB_LC (0) +#define MACHINE_HAS_VX (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -91,8 +91,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) +#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #endif /* CONFIG_64BIT */ /* diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index bf9c823d4020..49576115dbb7 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -15,6 +15,7 @@ #define SIGP_SET_ARCHITECTURE 18 #define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 +#define SIGP_STORE_ADDITIONAL_STATUS 23 /* SIGP condition codes */ #define SIGP_CC_ORDER_CODE_ACCEPTED 0 @@ -33,9 +34,10 @@ #ifndef __ASSEMBLY__ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) { - register unsigned int reg1 asm ("1") = parm; + register unsigned long reg1 asm ("1") = parm; int cc; asm volatile( diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 4f1307962a95..762d4f88af5a 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -29,7 +29,6 @@ extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); -extern void smp_yield(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); @@ -50,7 +49,6 @@ static inline int smp_find_processor_id(u16 address) { return 0; } static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } -static inline void smp_yield(void) { } static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7ad6da..d6bdf906caa5 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -37,11 +37,17 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) * (the type definitions are in asm/spinlock_types.h) */ +void arch_lock_relax(unsigned int cpu); + void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); -void arch_spin_relax(arch_spinlock_t *); void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +static inline void arch_spin_relax(arch_spinlock_t *lock) +{ + arch_lock_relax(lock->lock); +} + static inline u32 arch_spin_lockval(int cpu) { return ~cpu; @@ -64,11 +70,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) -{ - return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lp) { if (!arch_spin_trylock_once(lp)) @@ -91,7 +92,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - arch_spin_tryrelease_once(lp); + typecheck(unsigned int, lp->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (lp->lock) + : "d" (0) + : "cc", "memory"); } static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) @@ -123,13 +130,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) */ #define arch_write_can_lock(x) ((x)->lock == 0) -extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_read_trylock_retry(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + static inline int arch_read_trylock_once(arch_rwlock_t *rw) { unsigned int old = ACCESS_ONCE(rw->lock); @@ -144,16 +150,82 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); } +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __RAW_OP_OR "lao" +#define __RAW_OP_AND "lan" +#define __RAW_OP_ADD "laa" + +#define __RAW_LOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + op_string " %0,%2,%1\n" \ + "bcr 14,0\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#define __RAW_UNLOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + "bcr 14,0\n" \ + op_string " %0,%2,%1\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); + static inline void arch_read_lock(arch_rwlock_t *rw) { - if (!arch_read_trylock_once(rw)) + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); + if ((int) old < 0) _raw_read_lock_wait(rw); } -static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + __RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + if (old != 0) + _raw_write_lock_wait(rw, old); + rw->owner = SPINLOCK_LOCKVAL; +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + rw->owner = 0; + __RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND); +} + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); + +static inline void arch_read_lock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) - _raw_read_lock_wait_flags(rw, flags); + _raw_read_lock_wait(rw); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -169,19 +241,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { if (!arch_write_trylock_once(rw)) _raw_write_lock_wait(rw); -} - -static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) -{ - if (!arch_write_trylock_once(rw)) - _raw_write_lock_wait_flags(rw, flags); + rw->owner = SPINLOCK_LOCKVAL; } static inline void arch_write_unlock(arch_rwlock_t *rw) { - _raw_compare_and_swap(&rw->lock, 0x80000000, 0); + typecheck(unsigned int, rw->lock); + + rw->owner = 0; + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (rw->lock) + : "d" (0) + : "cc", "memory"); } +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + static inline int arch_read_trylock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) @@ -191,12 +268,20 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (!arch_write_trylock_once(rw)) - return _raw_write_trylock_retry(rw); + if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw)) + return 0; + rw->owner = SPINLOCK_LOCKVAL; return 1; } -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() +static inline void arch_read_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} + +static inline void arch_write_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index b2cd6ff7c2c5..d84b6939237c 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -13,6 +13,7 @@ typedef struct { typedef struct { unsigned int lock; + unsigned int owner; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 18ea9e3f8142..2542a7e4c8b4 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -103,6 +103,61 @@ static inline void restore_fp_regs(freg_t *fprs) asm volatile("ld 15,%0" : : "Q" (fprs[15])); } +static inline void save_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(addrtype *) vxrs) : : "1"); +} + +static inline void save_vx_regs_safe(__vector128 *vxrs) +{ + unsigned long cr0, flags; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_set_bit(0, 17); + __ctl_set_bit(0, 18); + save_vx_regs(vxrs); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +static inline void restore_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : : "Q" (*(addrtype *) vxrs) : "1"); +} + +static inline void save_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + save_vx_regs(task->thread.vxrs); + else +#endif + save_fp_regs(task->thread.fp_regs.fprs); +} + +static inline void restore_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + restore_vx_regs(task->thread.vxrs); + else +#endif + restore_fp_regs(task->thread.fp_regs.fprs); +} + static inline void save_access_regs(unsigned int *acrs) { typedef struct { int _[NUM_ACRS]; } acrstype; @@ -120,16 +175,16 @@ static inline void restore_access_regs(unsigned int *acrs) #define switch_to(prev,next,last) do { \ if (prev->mm) { \ save_fp_ctl(&prev->thread.fp_regs.fpc); \ - save_fp_regs(prev->thread.fp_regs.fprs); \ + save_fp_vx_regs(prev); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ + update_cr_regs(next); \ restore_fp_ctl(&next->thread.fp_regs.fpc); \ - restore_fp_regs(next->thread.fp_regs.fprs); \ + restore_fp_vx_regs(next); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index b833e9c0bfbf..4d62fd5b56e5 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -84,11 +84,13 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_UPROBE 7 /* breakpointed or single-stepping */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ +#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -97,6 +99,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_31BIT (1<<TIF_31BIT) #define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h new file mode 100644 index 000000000000..1411dff7fea7 --- /dev/null +++ b/arch/s390/include/asm/uprobes.h @@ -0,0 +1,42 @@ +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H + +#include <linux/notifier.h> + +typedef u16 uprobe_opcode_t; + +#define UPROBE_XOL_SLOT_BYTES 256 /* cache aligned */ + +#define UPROBE_SWBP_INSN 0x0002 +#define UPROBE_SWBP_INSN_SIZE 2 + +struct arch_uprobe { + union{ + uprobe_opcode_t insn[3]; + uprobe_opcode_t ixol[3]; + }; + unsigned int saved_per : 1; + unsigned int saved_int_code; +}; + +struct arch_uprobe_task { +}; + +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, + unsigned long addr); +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data); +void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs); +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs); +#endif /* _ASM_UPROBES_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index bc9746a7d47c..a62526d09201 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -22,13 +22,17 @@ struct vdso_data { __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ __u64 xtime_clock_sec; /* Kernel time 0x10 */ __u64 xtime_clock_nsec; /* 0x18 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ - __u64 wtom_clock_nsec; /* 0x28 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ - __u32 tz_dsttime; /* Type of dst correction 0x34 */ - __u32 ectg_available; /* ECTG instruction present 0x38 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ + __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ + __u64 xtime_coarse_nsec; /* 0x28 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ + __u64 wtom_clock_nsec; /* 0x38 */ + __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ + __u64 wtom_coarse_nsec; /* 0x48 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ + __u32 tz_dsttime; /* Type of dst correction 0x54 */ + __u32 ectg_available; /* ECTG instruction present 0x58 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h index bfe25d513ad2..10a179af62d8 100644 --- a/arch/s390/include/asm/vtimer.h +++ b/arch/s390/include/asm/vtimer.h @@ -28,6 +28,4 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_stop_cpu(void); - #endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h index b30de9c01bbe..5f0b8d7ddb0b 100644 --- a/arch/s390/include/uapi/asm/sigcontext.h +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -7,10 +7,14 @@ #define _ASM_S390_SIGCONTEXT_H #include <linux/compiler.h> +#include <linux/types.h> -#define __NUM_GPRS 16 -#define __NUM_FPRS 16 -#define __NUM_ACRS 16 +#define __NUM_GPRS 16 +#define __NUM_FPRS 16 +#define __NUM_ACRS 16 +#define __NUM_VXRS 32 +#define __NUM_VXRS_LOW 16 +#define __NUM_VXRS_HIGH 16 #ifndef __s390x__ @@ -59,6 +63,16 @@ typedef struct _s390_fp_regs fpregs; } _sigregs; +typedef struct +{ +#ifndef __s390x__ + unsigned long gprs_high[__NUM_GPRS]; +#endif + unsigned long long vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + unsigned char __reserved[128]; +} _sigregs_ext; + struct sigcontext { unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS]; diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h index 038f2b9178a4..3c3951e3415b 100644 --- a/arch/s390/include/uapi/asm/types.h +++ b/arch/s390/include/uapi/asm/types.h @@ -17,6 +17,10 @@ typedef unsigned long addr_t; typedef __signed__ long saddr_t; +typedef struct { + __u32 u[4]; +} __vector128; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h index 3e077b2a4705..64a69aa5dde0 100644 --- a/arch/s390/include/uapi/asm/ucontext.h +++ b/arch/s390/include/uapi/asm/ucontext.h @@ -7,10 +7,15 @@ #ifndef _ASM_S390_UCONTEXT_H #define _ASM_S390_UCONTEXT_H -#define UC_EXTENDED 0x00000001 - -#ifndef __s390x__ +#define UC_GPRS_HIGH 1 /* uc_mcontext_ext has valid high gprs */ +#define UC_VXRS 2 /* uc_mcontext_ext has valid vector regs */ +/* + * The struct ucontext_extended describes how the registers are stored + * on a rt signal frame. Please note that the structure is not fixed, + * if new CPU registers are added to the user state the size of the + * struct ucontext_extended will increase. + */ struct ucontext_extended { unsigned long uc_flags; struct ucontext *uc_link; @@ -19,11 +24,9 @@ struct ucontext_extended { sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(sigset_t)]; - unsigned long uc_gprs_high[16]; + _sigregs_ext uc_mcontext_ext; }; -#endif - struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a95c4ca99617..204c43a4c245 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o @@ -52,11 +52,9 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_UPROBES) += uprobes.o ifdef CONFIG_64BIT obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index afe1715a4eb7..ef279a136801 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -9,7 +9,7 @@ #include <linux/kbuild.h> #include <linux/kvm_host.h> #include <linux/sched.h> -#include <asm/cputime.h> +#include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> @@ -62,8 +62,12 @@ int main(void) DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec)); + DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); @@ -73,8 +77,11 @@ int main(void) /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 70d4b7c4beaa..a0a886c04977 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -50,6 +50,14 @@ typedef struct _s390_fp_regs32 fpregs; } _sigregs32; +typedef struct +{ + __u32 gprs_high[__NUM_GPRS]; + __u64 vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + __u8 __reserved[128]; +} _sigregs_ext32; + #define _SIGCONTEXT_NSIG32 64 #define _SIGCONTEXT_NSIG_BPW32 32 #define __SIGNAL_FRAMESIZE32 96 @@ -72,6 +80,7 @@ struct ucontext32 { compat_sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(compat_sigset_t)]; + _sigregs_ext32 uc_mcontext_ext; }; struct stat64_emu31; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 598b0b42668b..009f5eb11125 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -36,17 +36,16 @@ typedef struct struct sigcontext32 sc; _sigregs32 sregs; int signo; - __u32 gprs_high[NUM_GPRS]; - __u8 retcode[S390_SYSCALL_SIZE]; + _sigregs_ext32 sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ } sigframe32; typedef struct { __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; compat_siginfo_t info; struct ucontext32 uc; - __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) @@ -151,6 +150,38 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return err ? -EFAULT : 0; } +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + int i; + + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + if (current->thread.vxrs) { + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + int i; + + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else + restore_fp_regs(current->thread.fp_regs.fprs); +} + static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { _sigregs32 user_sregs; @@ -163,11 +194,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) @@ -207,37 +235,67 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } -static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int save_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; + /* Save high gprs to signal stack */ for (i = 0; i < NUM_GPRS; i++) gprs_high[i] = regs->gprs[i] >> 32; - if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high, + sizeof(sregs_ext->gprs_high))) return -EFAULT; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } return 0; } -static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int restore_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; - if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + /* Restore high gprs from signal stack */ + if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, + sizeof(&sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } return 0; } @@ -252,8 +310,9 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->sregs_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -269,12 +328,13 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) goto badframe; - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -324,37 +384,64 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { int sig = ksig->sig; - sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(sigframe32)); - + sigframe32 __user *frame; + struct sigcontext32 sc; + unsigned long restorer; + size_t frame_size; + + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); + if (!MACHINE_HAS_VX) + frame_size -= sizeof(frame->sregs_ext.vxrs_low) + + sizeof(frame->sregs_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + return -EFAULT; + + /* Create struct sigcontext32 on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sc.sregs = (__u32)(unsigned long __force) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs32 on the signal stack */ if (save_sigregs32(regs, &frame->sregs)) return -EFAULT; - if (save_sigregs_gprs_high(regs, frame->gprs_high)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) return -EFAULT; - if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) + + /* Create _sigregs_ext32 on the signal stack */ + if (save_sigregs_ext32(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __force __user *)(frame->retcode))) + /* Signal frames without vectors registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | @@ -375,50 +462,69 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, regs->gprs[6] = task_thread_info(current)->last_break; } - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe32)); - + rt_sigframe32 __user *frame; + unsigned long restorer; + size_t frame_size; + u32 uc_flags; + + frame_size = sizeof(*frame) - + sizeof(frame->uc.uc_mcontext_ext.__reserved); + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + uc_flags = UC_GPRS_HIGH; + if (MACHINE_HAS_VX) { + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } else + frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + + sizeof(frame->uc.uc_mcontext_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs32(regs, &frame->uc.uc_mcontext); - err |= save_sigregs_gprs_high(regs, frame->gprs_high); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user32(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(0, &frame->uc.uc_link) || + __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs32(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a3b9150e6802..9f73c8059022 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,9 +46,9 @@ struct dump_save_areas dump_save_areas; /* * Allocate and add a save area for a CPU */ -struct save_area *dump_save_area_create(int cpu) +struct save_area_ext *dump_save_area_create(int cpu) { - struct save_area **save_areas, *save_area; + struct save_area_ext **save_areas, *save_area; save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); if (!save_area) @@ -386,9 +386,45 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa) } /* + * Initialize vxrs high note (full 128 bit VX registers 16-31) + */ +static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) +{ + return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], + 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vxrs low note (lower halves of VX registers 0-15) + */ +static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) +{ + Elf64_Nhdr *note; + u64 len; + int i; + + note = (Elf64_Nhdr *)ptr; + note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; + note->n_descsz = 16 * 8; + note->n_type = NT_S390_VXRS_LOW; + len = sizeof(Elf64_Nhdr); + + memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + ptr += len; + /* Copy lower halves of SIMD registers 0-15 */ + for (i = 0; i < 16; i++) { + memcpy(ptr, &vx_regs[i], 8); + ptr += 8; + } + return ptr; +} + +/* * Fill ELF notes for one CPU with save area registers */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) { ptr = nt_prstatus(ptr, sa); ptr = nt_fpregset(ptr, sa); @@ -397,6 +433,10 @@ void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) ptr = nt_s390_tod_preg(ptr, sa); ptr = nt_s390_ctrs(ptr, sa); ptr = nt_s390_prefix(ptr, sa); + if (MACHINE_HAS_VX && vx_regs) { + ptr = nt_s390_vx_low(ptr, vx_regs); + ptr = nt_s390_vx_high(ptr, vx_regs); + } return ptr; } @@ -484,7 +524,7 @@ static int get_cpu_cnt(void) int i, cpus = 0; for (i = 0; i < dump_save_areas.count; i++) { - if (dump_save_areas.areas[i]->pref_reg == 0) + if (dump_save_areas.areas[i]->sa.pref_reg == 0) continue; cpus++; } @@ -530,17 +570,17 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) */ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { - struct save_area *sa; + struct save_area_ext *sa_ext; void *ptr_start = ptr; int i; ptr = nt_prpsinfo(ptr); for (i = 0; i < dump_save_areas.count; i++) { - sa = dump_save_areas.areas[i]; - if (sa->pref_reg == 0) + sa_ext = dump_save_areas.areas[i]; + if (sa_ext->sa.pref_reg == 0) continue; - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); } ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); @@ -581,7 +621,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + mem_chunk_cnt * sizeof(Elf64_Phdr); hdr = kzalloc_panic(alloc_size); /* Init elf header */ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 993efe6a887c..f3762937dd82 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -60,6 +60,11 @@ enum { A_28, /* Access reg. starting at position 28 */ C_8, /* Control reg. starting at position 8 */ C_12, /* Control reg. starting at position 12 */ + V_8, /* Vector reg. starting at position 8, extension bit at 36 */ + V_12, /* Vector reg. starting at position 12, extension bit at 37 */ + V_16, /* Vector reg. starting at position 16, extension bit at 38 */ + V_32, /* Vector reg. starting at position 32, extension bit at 39 */ + W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */ B_16, /* Base register starting at position 16 */ B_32, /* Base register starting at position 32 */ X_12, /* Index register starting at position 12 */ @@ -82,6 +87,8 @@ enum { U8_24, /* 8 bit unsigned value starting at 24 */ U8_32, /* 8 bit unsigned value starting at 32 */ I8_8, /* 8 bit signed value starting at 8 */ + I8_16, /* 8 bit signed value starting at 16 */ + I8_24, /* 8 bit signed value starting at 24 */ I8_32, /* 8 bit signed value starting at 32 */ J12_12, /* PC relative offset at 12 */ I16_16, /* 16 bit signed value starting at 16 */ @@ -96,6 +103,9 @@ enum { U32_16, /* 32 bit unsigned value starting at 16 */ M_16, /* 4 bit optional mask starting at 16 */ M_20, /* 4 bit optional mask starting at 20 */ + M_24, /* 4 bit optional mask starting at 24 */ + M_28, /* 4 bit optional mask starting at 28 */ + M_32, /* 4 bit optional mask starting at 32 */ RO_28, /* optional GPR starting at position 28 */ }; @@ -130,7 +140,7 @@ enum { INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, - INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM, INSTR_RXF_FRRDF, INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, @@ -143,6 +153,17 @@ enum { INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, + INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM, + INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM, + INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M, + INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M, + INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000, + INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V, + INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000, + INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0, + INSTR_VRS_RVRDM, + INSTR_VRV_VVRDM, INSTR_VRV_VWRDM, + INSTR_VRX_VRRDM, INSTR_VRX_VRRD0, }; static const struct s390_operand operands[] = @@ -168,6 +189,11 @@ static const struct s390_operand operands[] = [A_28] = { 4, 28, OPERAND_AR }, [C_8] = { 4, 8, OPERAND_CR }, [C_12] = { 4, 12, OPERAND_CR }, + [V_8] = { 4, 8, OPERAND_VR }, + [V_12] = { 4, 12, OPERAND_VR }, + [V_16] = { 4, 16, OPERAND_VR }, + [V_32] = { 4, 32, OPERAND_VR }, + [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, @@ -190,6 +216,11 @@ static const struct s390_operand operands[] = [U8_24] = { 8, 24, 0 }, [U8_32] = { 8, 32, 0 }, [J12_12] = { 12, 12, OPERAND_PCREL }, + [I8_8] = { 8, 8, OPERAND_SIGNED }, + [I8_16] = { 8, 16, OPERAND_SIGNED }, + [I8_24] = { 8, 24, OPERAND_SIGNED }, + [I8_32] = { 8, 32, OPERAND_SIGNED }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, [I16_16] = { 16, 16, OPERAND_SIGNED }, [U16_16] = { 16, 16, 0 }, [U16_32] = { 16, 32, 0 }, @@ -202,6 +233,9 @@ static const struct s390_operand operands[] = [U32_16] = { 32, 16, 0 }, [M_16] = { 4, 16, 0 }, [M_20] = { 4, 20, 0 }, + [M_24] = { 4, 24, 0 }, + [M_28] = { 4, 28, 0 }, + [M_32] = { 4, 32, 0 }, [RO_28] = { 4, 28, OPERAND_GPR } }; @@ -283,6 +317,7 @@ static const unsigned char formats[][7] = { [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 }, [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, @@ -307,6 +342,37 @@ static const unsigned char formats[][7] = { [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, + [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 }, + [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 }, + [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 }, + [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 }, + [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 }, + [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 }, + [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 }, + [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 }, + [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 }, + [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 }, + [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 }, + [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 }, + [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 }, + [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 }, + [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 }, + [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 }, + [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 }, + [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 }, + [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 }, + [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 }, + [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 }, + [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 }, + [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 }, + [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 }, + [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 }, + [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 }, + [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 }, }; enum { @@ -381,6 +447,11 @@ enum { LONG_INSN_MPCIFC, LONG_INSN_STPCIFC, LONG_INSN_PCISTB, + LONG_INSN_VPOPCT, + LONG_INSN_VERLLV, + LONG_INSN_VESRAV, + LONG_INSN_VESRLV, + LONG_INSN_VSBCBI }; static char *long_insn_name[] = { @@ -455,6 +526,11 @@ static char *long_insn_name[] = { [LONG_INSN_MPCIFC] = "mpcifc", [LONG_INSN_STPCIFC] = "stpcifc", [LONG_INSN_PCISTB] = "pcistb", + [LONG_INSN_VPOPCT] = "vpopct", + [LONG_INSN_VERLLV] = "verllv", + [LONG_INSN_VESRAV] = "vesrav", + [LONG_INSN_VESRLV] = "vesrlv", + [LONG_INSN_VSBCBI] = "vsbcbi", }; static struct s390_insn opcode[] = { @@ -1369,6 +1445,150 @@ static struct s390_insn opcode_e5[] = { { "", 0, INSTR_INVALID } }; +static struct s390_insn opcode_e7[] = { +#ifdef CONFIG_64BIT + { "lcbb", 0x27, INSTR_RXE_RRRDM }, + { "vgef", 0x13, INSTR_VRV_VVRDM }, + { "vgeg", 0x12, INSTR_VRV_VVRDM }, + { "vgbm", 0x44, INSTR_VRI_V0I0 }, + { "vgm", 0x46, INSTR_VRI_V0IIM }, + { "vl", 0x06, INSTR_VRX_VRRD0 }, + { "vlr", 0x56, INSTR_VRR_VV00000 }, + { "vlrp", 0x05, INSTR_VRX_VRRDM }, + { "vleb", 0x00, INSTR_VRX_VRRDM }, + { "vleh", 0x01, INSTR_VRX_VRRDM }, + { "vlef", 0x03, INSTR_VRX_VRRDM }, + { "vleg", 0x02, INSTR_VRX_VRRDM }, + { "vleib", 0x40, INSTR_VRI_V0IM }, + { "vleih", 0x41, INSTR_VRI_V0IM }, + { "vleif", 0x43, INSTR_VRI_V0IM }, + { "vleig", 0x42, INSTR_VRI_V0IM }, + { "vlgv", 0x21, INSTR_VRS_RVRDM }, + { "vllez", 0x04, INSTR_VRX_VRRDM }, + { "vlm", 0x36, INSTR_VRS_VVRD0 }, + { "vlbb", 0x07, INSTR_VRX_VRRDM }, + { "vlvg", 0x22, INSTR_VRS_VRRDM }, + { "vlvgp", 0x62, INSTR_VRR_VRR0000 }, + { "vll", 0x37, INSTR_VRS_VRRD0 }, + { "vmrh", 0x61, INSTR_VRR_VVV000M }, + { "vmrl", 0x60, INSTR_VRR_VVV000M }, + { "vpk", 0x94, INSTR_VRR_VVV000M }, + { "vpks", 0x97, INSTR_VRR_VVV0M0M }, + { "vpkls", 0x95, INSTR_VRR_VVV0M0M }, + { "vperm", 0x8c, INSTR_VRR_VVV000V }, + { "vpdi", 0x84, INSTR_VRR_VVV000M }, + { "vrep", 0x4d, INSTR_VRI_VVIM }, + { "vrepi", 0x45, INSTR_VRI_V0IM }, + { "vscef", 0x1b, INSTR_VRV_VWRDM }, + { "vsceg", 0x1a, INSTR_VRV_VWRDM }, + { "vsel", 0x8d, INSTR_VRR_VVV000V }, + { "vseg", 0x5f, INSTR_VRR_VV0000M }, + { "vst", 0x0e, INSTR_VRX_VRRD0 }, + { "vsteb", 0x08, INSTR_VRX_VRRDM }, + { "vsteh", 0x09, INSTR_VRX_VRRDM }, + { "vstef", 0x0b, INSTR_VRX_VRRDM }, + { "vsteg", 0x0a, INSTR_VRX_VRRDM }, + { "vstm", 0x3e, INSTR_VRS_VVRD0 }, + { "vstl", 0x3f, INSTR_VRS_VRRD0 }, + { "vuph", 0xd7, INSTR_VRR_VV0000M }, + { "vuplh", 0xd5, INSTR_VRR_VV0000M }, + { "vupl", 0xd6, INSTR_VRR_VV0000M }, + { "vupll", 0xd4, INSTR_VRR_VV0000M }, + { "va", 0xf3, INSTR_VRR_VVV000M }, + { "vacc", 0xf1, INSTR_VRR_VVV000M }, + { "vac", 0xbb, INSTR_VRR_VVVM00V }, + { "vaccc", 0xb9, INSTR_VRR_VVVM00V }, + { "vn", 0x68, INSTR_VRR_VVV0000 }, + { "vnc", 0x69, INSTR_VRR_VVV0000 }, + { "vavg", 0xf2, INSTR_VRR_VVV000M }, + { "vavgl", 0xf0, INSTR_VRR_VVV000M }, + { "vcksm", 0x66, INSTR_VRR_VVV0000 }, + { "vec", 0xdb, INSTR_VRR_VV0000M }, + { "vecl", 0xd9, INSTR_VRR_VV0000M }, + { "vceq", 0xf8, INSTR_VRR_VVV0M0M }, + { "vch", 0xfb, INSTR_VRR_VVV0M0M }, + { "vchl", 0xf9, INSTR_VRR_VVV0M0M }, + { "vclz", 0x53, INSTR_VRR_VV0000M }, + { "vctz", 0x52, INSTR_VRR_VV0000M }, + { "vx", 0x6d, INSTR_VRR_VVV0000 }, + { "vgfm", 0xb4, INSTR_VRR_VVV000M }, + { "vgfma", 0xbc, INSTR_VRR_VVVM00V }, + { "vlc", 0xde, INSTR_VRR_VV0000M }, + { "vlp", 0xdf, INSTR_VRR_VV0000M }, + { "vmx", 0xff, INSTR_VRR_VVV000M }, + { "vmxl", 0xfd, INSTR_VRR_VVV000M }, + { "vmn", 0xfe, INSTR_VRR_VVV000M }, + { "vmnl", 0xfc, INSTR_VRR_VVV000M }, + { "vmal", 0xaa, INSTR_VRR_VVVM00V }, + { "vmae", 0xae, INSTR_VRR_VVVM00V }, + { "vmale", 0xac, INSTR_VRR_VVVM00V }, + { "vmah", 0xab, INSTR_VRR_VVVM00V }, + { "vmalh", 0xa9, INSTR_VRR_VVVM00V }, + { "vmao", 0xaf, INSTR_VRR_VVVM00V }, + { "vmalo", 0xad, INSTR_VRR_VVVM00V }, + { "vmh", 0xa3, INSTR_VRR_VVV000M }, + { "vmlh", 0xa1, INSTR_VRR_VVV000M }, + { "vml", 0xa2, INSTR_VRR_VVV000M }, + { "vme", 0xa6, INSTR_VRR_VVV000M }, + { "vmle", 0xa4, INSTR_VRR_VVV000M }, + { "vmo", 0xa7, INSTR_VRR_VVV000M }, + { "vmlo", 0xa5, INSTR_VRR_VVV000M }, + { "vno", 0x6b, INSTR_VRR_VVV0000 }, + { "vo", 0x6a, INSTR_VRR_VVV0000 }, + { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M }, + { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M }, + { "verll", 0x33, INSTR_VRS_VVRDM }, + { "verim", 0x72, INSTR_VRI_VVV0IM }, + { "veslv", 0x70, INSTR_VRR_VVV000M }, + { "vesl", 0x30, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M }, + { "vesra", 0x3a, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M }, + { "vesrl", 0x38, INSTR_VRS_VVRDM }, + { "vsl", 0x74, INSTR_VRR_VVV0000 }, + { "vslb", 0x75, INSTR_VRR_VVV0000 }, + { "vsldb", 0x77, INSTR_VRI_VVV0I0 }, + { "vsra", 0x7e, INSTR_VRR_VVV0000 }, + { "vsrab", 0x7f, INSTR_VRR_VVV0000 }, + { "vsrl", 0x7c, INSTR_VRR_VVV0000 }, + { "vsrlb", 0x7d, INSTR_VRR_VVV0000 }, + { "vs", 0xf7, INSTR_VRR_VVV000M }, + { "vscb", 0xf5, INSTR_VRR_VVV000M }, + { "vsb", 0xbf, INSTR_VRR_VVVM00V }, + { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V }, + { "vsumg", 0x65, INSTR_VRR_VVV000M }, + { "vsumq", 0x67, INSTR_VRR_VVV000M }, + { "vsum", 0x64, INSTR_VRR_VVV000M }, + { "vtm", 0xd8, INSTR_VRR_VV00000 }, + { "vfae", 0x82, INSTR_VRR_VVV0M0M }, + { "vfee", 0x80, INSTR_VRR_VVV0M0M }, + { "vfene", 0x81, INSTR_VRR_VVV0M0M }, + { "vistr", 0x5c, INSTR_VRR_VV00M0M }, + { "vstrc", 0x8a, INSTR_VRR_VVVMM0V }, + { "vfa", 0xe3, INSTR_VRR_VVV00MM }, + { "wfc", 0xcb, INSTR_VRR_VV000MM }, + { "wfk", 0xca, INSTR_VRR_VV000MM }, + { "vfce", 0xe8, INSTR_VRR_VVV0MMM }, + { "vfch", 0xeb, INSTR_VRR_VVV0MMM }, + { "vfche", 0xea, INSTR_VRR_VVV0MMM }, + { "vcdg", 0xc3, INSTR_VRR_VV00MMM }, + { "vcdlg", 0xc1, INSTR_VRR_VV00MMM }, + { "vcgd", 0xc2, INSTR_VRR_VV00MMM }, + { "vclgd", 0xc0, INSTR_VRR_VV00MMM }, + { "vfd", 0xe5, INSTR_VRR_VVV00MM }, + { "vfi", 0xc7, INSTR_VRR_VV00MMM }, + { "vlde", 0xc4, INSTR_VRR_VV000MM }, + { "vled", 0xc5, INSTR_VRR_VV00MMM }, + { "vfm", 0xe7, INSTR_VRR_VVV00MM }, + { "vfma", 0x8f, INSTR_VRR_VVVM0MV }, + { "vfms", 0x8e, INSTR_VRR_VVVM0MV }, + { "vfpso", 0xcc, INSTR_VRR_VV00MMM }, + { "vfsq", 0xce, INSTR_VRR_VV000MM }, + { "vfs", 0xe2, INSTR_VRR_VVV00MM }, + { "vftci", 0x4a, INSTR_VRI_VVIMM }, +#endif +}; + static struct s390_insn opcode_eb[] = { #ifdef CONFIG_64BIT { "lmg", 0x04, INSTR_RSY_RRRD }, @@ -1552,16 +1772,17 @@ static struct s390_insn opcode_ed[] = { static unsigned int extract_operand(unsigned char *code, const struct s390_operand *operand) { + unsigned char *cp; unsigned int val; int bits; /* Extract fragments of the operand byte for byte. */ - code += operand->shift / 8; + cp = code + operand->shift / 8; bits = (operand->shift & 7) + operand->bits; val = 0; do { val <<= 8; - val |= (unsigned int) *code++; + val |= (unsigned int) *cp++; bits -= 8; } while (bits > 0); val >>= -bits; @@ -1571,6 +1792,18 @@ static unsigned int extract_operand(unsigned char *code, if (operand->bits == 20 && operand->shift == 20) val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + /* Check for register extensions bits for vector registers. */ + if (operand->flags & OPERAND_VR) { + if (operand->shift == 8) + val |= (code[4] & 8) << 1; + else if (operand->shift == 12) + val |= (code[4] & 4) << 2; + else if (operand->shift == 16) + val |= (code[4] & 2) << 3; + else if (operand->shift == 32) + val |= (code[4] & 1) << 4; + } + /* Sign extend value if the operand is signed or pc relative. */ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && (val & (1U << (operand->bits - 1)))) @@ -1639,6 +1872,10 @@ struct s390_insn *find_insn(unsigned char *code) case 0xe5: table = opcode_e5; break; + case 0xe7: + table = opcode_e7; + opfrag = code[5]; + break; case 0xeb: table = opcode_eb; opfrag = code[5]; @@ -1734,6 +1971,8 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) ptr += sprintf(ptr, "%%c%i", value); + else if (operand->flags & OPERAND_VR) + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0dff972a169c..cef2879edff3 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,10 +390,10 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - if (test_facility(66)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; + if (test_facility(129)) + S390_lowcore.machine_flags |= MACHINE_FLAG_VX; #endif } diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1aad48398d06..0554b9771c9f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,7 +4,7 @@ #include <linux/types.h> #include <linux/signal.h> #include <asm/ptrace.h> -#include <asm/cputime.h> +#include <asm/idle.h> extern void *restart_stack; extern unsigned long suspend_zero_pages; @@ -21,6 +21,8 @@ void psw_idle(struct s390_idle_data *, unsigned long); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +int alloc_vector_registers(struct task_struct *tsk); + void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -43,8 +45,10 @@ void special_op_exception(struct pt_regs *regs); void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); +void vector_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index f2e674c702e1..7b2e03afd017 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -42,7 +42,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_UPROBE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) @@ -265,6 +266,10 @@ sysc_work: jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule +#ifdef CONFIG_UPROBES + tm __TI_flags+7(%r12),_TIF_UPROBE + jo sysc_uprobe_notify +#endif tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep tm __TI_flags+7(%r12),_TIF_SIGPENDING @@ -323,6 +328,16 @@ sysc_notify_resume: jg do_notify_resume # +# _TIF_UPROBE is set, call uprobe_notify_resume +# +#ifdef CONFIG_UPROBES +sysc_uprobe_notify: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg uprobe_notify_resume +#endif + +# # _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 54d6493c4a56..51d14fe5eb9a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -1,7 +1,7 @@ /* * Dynamic function tracer architecture backend. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009,2014 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -17,100 +17,76 @@ #include <asm/asm-offsets.h> #include "entry.h" -#ifdef CONFIG_DYNAMIC_FTRACE - +void mcount_replace_code(void); void ftrace_disable_code(void); void ftrace_enable_insn(void); -#ifdef CONFIG_64BIT /* - * The 64-bit mcount code looks like this: + * The mcount code looks like this: * stg %r14,8(%r15) # offset 0 - * > larl %r1,<&counter> # offset 6 - * > brasl %r14,_mcount # offset 12 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The middle two instructions of the mcount - * block get overwritten by ftrace_make_nop / ftrace_make_call. - * The 64-bit enabled ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * Total length is 24 bytes. The complete mcount block initially gets replaced + * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop + * only patch the jg/lg instruction within the block. + * Note: we do not patch the first instruction to an unconditional branch, + * since that would break kprobes/jprobes. It is easier to leave the larl + * instruction in and only modify the second instruction. + * The enabled ftrace code block looks like this: + * larl %r0,.+24 # offset 0 * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The return points of the mcount/ftrace function have the same offset 18. - * The 64-bit disable ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 + * The ftrace function gets called with a non-standard C function call ABI + * where r0 contains the return address. It is also expected that the called + * function only clobbers r0 and r1, but restores r2-r15. + * The return point of the ftrace function has offset 24, so execution + * continues behind the mcount block. + * larl %r0,.+24 # offset 0 * > jg .+18 # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ asm( " .align 4\n" + "mcount_replace_code:\n" + " larl %r0,0f\n" "ftrace_disable_code:\n" " jg 0f\n" - " lgr %r0,%r0\n" - " basr %r14,%r1\n" + " br %r1\n" + " brcl 0,0\n" + " brc 0,0\n" "0:\n" " .align 4\n" "ftrace_enable_insn:\n" " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); +#define MCOUNT_BLOCK_SIZE 24 +#define MCOUNT_INSN_OFFSET 6 #define FTRACE_INSN_SIZE 6 -#else /* CONFIG_64BIT */ -/* - * The 31-bit mcount code looks like this: - * st %r14,4(%r15) # offset 0 - * > bras %r1,0f # offset 4 - * > .long _mcount # offset 8 - * > .long <&counter> # offset 12 - * > 0: l %r14,0(%r1) # offset 16 - * > l %r1,4(%r1) # offset 20 - * basr %r14,%r14 # offset 24 - * l %r14,4(%r15) # offset 26 - * Total length is 30 bytes. The twenty bytes starting from offset 4 - * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. - * The 31-bit enabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > l %r14,__LC_FTRACE_FUNC # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The return points of the mcount/ftrace function have the same offset 26. - * The 31-bit disabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > j .+26 # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The j instruction branches to offset 30 to skip as many instructions - * as possible. - */ -asm( - " .align 4\n" - "ftrace_disable_code:\n" - " j 1f\n" - " j 0f\n" - " .fill 12,1,0x07\n" - "0: basr %r14,%r14\n" - "1:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define FTRACE_INSN_SIZE 4 - -#endif /* CONFIG_64BIT */ - +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return 0; +} int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { + /* Initial replacement of the whole mcount block */ + if (addr == MCOUNT_ADDR) { + if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, + mcount_replace_code, + MCOUNT_BLOCK_SIZE)) + return -EPERM; + return 0; + } if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, MCOUNT_INSN_SIZE)) return -EPERM; @@ -135,8 +111,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses @@ -162,31 +136,26 @@ out: return parent; } -#ifdef CONFIG_DYNAMIC_FTRACE /* * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative and save to prepare_ftrace_return. To disable - * the call to prepare_ftrace_return we patch the bras offset to point - * directly after the instructions. To enable the call we calculate - * the original offset to prepare_ftrace_return and put it back. + * there is branch relative on condition. To enable the ftrace graph code + * block, we simply patch the mask field of the instruction to zero and + * turn the instruction into a nop. + * To disable the ftrace graph code the mask field will be patched to + * all ones, which turns the instruction into an unconditional branch. */ int ftrace_enable_ftrace_graph_caller(void) { - unsigned short offset; + u8 op = 0x04; /* set mask field to zero */ - offset = ((void *) prepare_ftrace_return - - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } int ftrace_disable_ftrace_graph_caller(void) { - static unsigned short offset = 0x0002; + u8 op = 0xf4; /* set mask field to all ones */ - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index e88d35d74950..d62eee11f0b5 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -398,7 +398,7 @@ ENTRY(startup_kdump) xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f la %r0,1 diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c new file mode 100644 index 000000000000..c846aee7372f --- /dev/null +++ b/arch/s390/kernel/idle.c @@ -0,0 +1,124 @@ +/* + * Idle functions for s390. + * + * Copyright IBM Corp. 2014 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/kprobes.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include "entry.h" + +static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +void __kprobes enabled_wait(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + clear_cpu_flag(CIF_NOHZ_DELAY); + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; + idle->idle_time += idle_time; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; +} + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long idle_count; + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return sprintf(buf, "%llu\n", idle_count); +} +DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); +} +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +cputime64_t arch_cpu_idle_time(int cpu) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; +} + +void arch_cpu_idle_enter(void) +{ + local_mcck_disable(); +} + +void arch_cpu_idle(void) +{ + if (!test_cpu_flag(CIF_MCCK_PENDING)) + /* Halt the cpu and keep track of cpu time accounting. */ + enabled_wait(); + local_irq_enable(); +} + +void arch_cpu_idle_exit(void) +{ + local_mcck_enable(); + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); +} + +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8eb82443cfbd..1b8a38ab7861 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -70,6 +70,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"}, {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, @@ -258,7 +259,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) - __get_cpu_var(s390_idle).nohz_delay = 1; + set_cpu_flag(CIF_NOHZ_DELAY); index = ext_hash(ext_code.code); rcu_read_lock(); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b95af5..27ae5433fe4d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -58,161 +58,13 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = { .insn_size = MAX_INSN_SIZE, }; -static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) -{ - if (!is_known_insn((unsigned char *)insn)) - return -EINVAL; - switch (insn[0] >> 8) { - case 0x0c: /* bassm */ - case 0x0b: /* bsm */ - case 0x83: /* diag */ - case 0x44: /* ex */ - case 0xac: /* stnsm */ - case 0xad: /* stosm */ - return -EINVAL; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x00: /* exrl */ - return -EINVAL; - } - } - switch (insn[0]) { - case 0x0101: /* pr */ - case 0xb25a: /* bsa */ - case 0xb240: /* bakr */ - case 0xb258: /* bsg */ - case 0xb218: /* pc */ - case 0xb228: /* pt */ - case 0xb98d: /* epsw */ - return -EINVAL; - } - return 0; -} - -static int __kprobes get_fixup_type(kprobe_opcode_t *insn) -{ - /* default fixup method */ - int fixup = FIXUP_PSW_NORMAL; - - switch (insn[0] >> 8) { - case 0x05: /* balr */ - case 0x0d: /* basr */ - fixup = FIXUP_RETURN_REGISTER; - /* if r2 = 0, no branch will be taken */ - if ((insn[0] & 0x0f) == 0) - fixup |= FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x06: /* bctr */ - case 0x07: /* bcr */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x45: /* bal */ - case 0x4d: /* bas */ - fixup = FIXUP_RETURN_REGISTER; - break; - case 0x47: /* bc */ - case 0x46: /* bct */ - case 0x86: /* bxh */ - case 0x87: /* bxle */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x82: /* lpsw */ - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xb2: /* lpswe */ - if ((insn[0] & 0xff) == 0xb2) - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xa7: /* bras */ - if ((insn[0] & 0x0f) == 0x05) - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xc0: - if ((insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xeb: - switch (insn[2] & 0xff) { - case 0x44: /* bxhg */ - case 0x45: /* bxleg */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - case 0xe3: /* bctg */ - if ((insn[2] & 0xff) == 0x46) - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0xec: - switch (insn[2] & 0xff) { - case 0xe5: /* clgrb */ - case 0xe6: /* cgrb */ - case 0xf6: /* crb */ - case 0xf7: /* clrb */ - case 0xfc: /* cgib */ - case 0xfd: /* cglib */ - case 0xfe: /* cib */ - case 0xff: /* clib */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - } - return fixup; -} - -static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) -{ - /* Check if we have a RIL-b or RIL-c format instruction which - * we need to modify in order to avoid instruction emulation. */ - switch (insn[0] >> 8) { - case 0xc0: - if ((insn[0] & 0x0f) == 0x00) /* larl */ - return true; - break; - case 0xc4: - switch (insn[0] & 0x0f) { - case 0x02: /* llhrl */ - case 0x04: /* lghrl */ - case 0x05: /* lhrl */ - case 0x06: /* llghrl */ - case 0x07: /* sthrl */ - case 0x08: /* lgrl */ - case 0x0b: /* stgrl */ - case 0x0c: /* lgfrl */ - case 0x0d: /* lrl */ - case 0x0e: /* llgfrl */ - case 0x0f: /* strl */ - return true; - } - break; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x02: /* pfdrl */ - case 0x04: /* cghrl */ - case 0x05: /* chrl */ - case 0x06: /* clghrl */ - case 0x07: /* clhrl */ - case 0x08: /* cgrl */ - case 0x0a: /* clgrl */ - case 0x0c: /* cgfrl */ - case 0x0d: /* crl */ - case 0x0e: /* clgfrl */ - case 0x0f: /* clrl */ - return true; - } - break; - } - return false; -} - static void __kprobes copy_instruction(struct kprobe *p) { s64 disp, new_disp; u64 addr, new_addr; memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); - if (!is_insn_relative_long(p->ainsn.insn)) + if (!probe_is_insn_relative_long(p->ainsn.insn)) return; /* * For pc-relative instructions in RIL-b or RIL-c format patch the @@ -276,7 +128,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long) p->addr & 0x01) return -EINVAL; /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode(p->addr)) + if (probe_is_prohibited_opcode(p->addr)) return -EINVAL; if (s390_get_insn_slot(p)) return -ENOMEM; @@ -605,7 +457,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; - int fixup = get_fixup_type(p->ainsn.insn); + int fixup = probe_get_fixup_type(p->ainsn.insn); if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; @@ -789,11 +641,6 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -static void __used __kprobes jprobe_return_end(void) -{ - asm volatile("bcr 0,0"); -} - int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 719e27b2cf22..4685337fa7c6 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -25,6 +25,7 @@ #include <asm/elf.h> #include <asm/asm-offsets.h> #include <asm/os_info.h> +#include <asm/switch_to.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -43,7 +44,7 @@ static void add_elf_notes(int cpu) memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, sa, NULL); memset(ptr, 0, sizeof(struct elf_note)); } @@ -53,8 +54,11 @@ static void add_elf_notes(int cpu) static void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + struct _lowcore *lc; int cpu, this_cpu; + /* Get lowcore pointer from store status of this CPU (absolute zero) */ + lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area; this_cpu = smp_find_processor_id(stap()); add_elf_notes(this_cpu); for_each_online_cpu(cpu) { @@ -64,6 +68,8 @@ static void setup_regs(void) continue; add_elf_notes(cpu); } + if (MACHINE_HAS_VX) + save_vx_regs_safe((void *) lc->vector_save_area_addr); /* Copy dump CPU store status info to absolute zero */ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 433c6dbfa442..4300ea374826 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -8,62 +8,72 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#include <asm/ptrace.h> .section .kprobes.text, "ax" ENTRY(ftrace_stub) br %r14 +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) + .globl ftrace_regs_caller + .set ftrace_regs_caller,ftrace_caller + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + aghik %r2,%r0,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r1,ftrace_trace_function +#else + lgr %r2,%r0 + aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) #endif - stm %r2,%r5,16(%r15) - bras %r1,1f -0: .long ftrace_trace_function -1: st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r0,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - ahi %r2,-MCOUNT_INSN_SIZE - l %r14,0b-0b(%r1) - l %r14,0(%r14) - basr %r14,%r14 + lgr %r3,%r14 + la %r5,STACK_PTREGS(%r15) + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER - l %r2,100(%r15) - l %r3,152(%r15) +# The j instruction gets runtime patched to a nop instruction. +# See ftrace_enable_ftrace_graph_caller. ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: st %r2,100(%r15) + j ftrace_graph_caller_end + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) + lg %r3,(STACK_PTREGS_PSW+8)(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) +ftrace_graph_caller_end: + .globl ftrace_graph_caller_end #endif - ahi %r15,96 - l %r14,56(%r15) - lm %r2,%r5,16(%r15) - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(return_to_handler) - stm %r2,%r5,16(%r15) - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - st %r0,__SF_BACKCHAIN(%r15) - bras %r1,0f - .long ftrace_return_to_handler -0: l %r2,0b-0b(%r1) - basr %r14,%r2 - lr %r14,%r2 - ahi %r15,96 - lm %r2,%r5,16(%r15) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,STACK_FRAME_OVERHEAD + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) br %r14 #endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S deleted file mode 100644 index c67a8bf0fd9a..000000000000 --- a/arch/s390/kernel/mcount64.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - br %r14 - -ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE - br %r14 - -ENTRY(ftrace_caller) -#endif - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - lgr %r2,%r14 - lg %r3,168(%r15) - aghi %r2,-MCOUNT_INSN_SIZE - larl %r14,ftrace_trace_function - lg %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - lg %r2,168(%r15) - lg %r3,272(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: stg %r2,168(%r15) -#endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) - br %r14 - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -ENTRY(return_to_handler) - stmg %r2,%r5,32(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - brasl %r14,ftrace_return_to_handler - aghi %r15,160 - lgr %r14,%r2 - lmg %r2,%r5,32(%r15) - br %r14 - -#endif diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 210e1285f75a..db96b418160a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -20,6 +20,7 @@ #include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> +#include <asm/switch_to.h> struct mcck_struct { int kill_task; @@ -163,6 +164,21 @@ static int notrace s390_revalidate_registers(struct mci *mci) " ld 15,120(%0)\n" : : "a" (fpt_save_area)); } + +#ifdef CONFIG_64BIT + /* Revalidate vector registers */ + if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!mci->vr) { + /* + * Vector registers can't be restored and therefore + * the process needs to be terminated. + */ + kill_task = 1; + } + restore_vx_regs((__vector128 *) + S390_lowcore.vector_save_area_addr); + } +#endif /* Revalidate access registers */ asm volatile( " lam 0,15,0(%0)" diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 813ec7260878..f6f8886399f6 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -49,7 +49,7 @@ PGM_CHECK_DEFAULT /* 17 */ PGM_CHECK_64BIT(transaction_exception) /* 18 */ PGM_CHECK_DEFAULT /* 19 */ PGM_CHECK_DEFAULT /* 1a */ -PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK_64BIT(vector_exception) /* 1b */ PGM_CHECK(space_switch_exception) /* 1c */ PGM_CHECK(hfp_sqrt_exception) /* 1d */ PGM_CHECK_DEFAULT /* 1e */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 93b9ca42e5c0..ed84cc224899 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -61,30 +61,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -void arch_cpu_idle(void) -{ - local_mcck_disable(); - if (test_cpu_flag(CIF_MCCK_PENDING)) { - local_mcck_enable(); - local_irq_enable(); - return; - } - /* Halt the cpu and keep track of cpu time accounting. */ - vtime_stop_cpu(); - local_irq_enable(); -} - -void arch_cpu_idle_exit(void) -{ - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); -} - -void arch_cpu_idle_dead(void) -{ - cpu_die(); -} - extern void __kprobes kernel_thread_starter(void); /* diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 24612029f450..edefead3b43a 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -23,7 +23,6 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); */ void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct cpuid *id = &__get_cpu_var(cpu_id); get_cpu_id(id); @@ -31,7 +30,6 @@ void cpu_init(void) current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - memset(idle, 0, sizeof(*idle)); } /* @@ -41,7 +39,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te" + "edat", "etf3eh", "highgprs", "te", "vx" }; unsigned long n = (unsigned long) v - 1; int i; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5dc7ad9e2fbf..f537e937a988 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -38,15 +38,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> -enum s390_regset { - REGSET_GENERAL, - REGSET_FP, - REGSET_LAST_BREAK, - REGSET_TDB, - REGSET_SYSTEM_CALL, - REGSET_GENERAL_EXTENDED, -}; - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -55,27 +46,39 @@ void update_cr_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (MACHINE_HAS_TE || MACHINE_HAS_VX) { unsigned long cr, cr_new; __ctl_store(cr, 0, 0); - /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); - if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); + cr_new = cr; + if (MACHINE_HAS_TE) { + /* Set or clear transaction execution TXC bit 8. */ + cr_new |= (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + } + if (MACHINE_HAS_VX) { + /* Enable/disable of vector extension */ + cr_new &= ~(1UL << 17); + if (task->thread.vxrs) + cr_new |= (1UL << 17); + } if (cr_new != cr) __ctl_load(cr_new, 0, 0); - /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; - else - cr_new |= 2UL; + if (MACHINE_HAS_TE) { + /* Set/clear transaction execution TDC bits 62/63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & + PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } #endif /* Copy user specified PER registers */ @@ -84,7 +87,8 @@ void update_cr_regs(struct task_struct *task) new.end = thread->per_user.end; /* merge TIF_SINGLE_STEP into user specified PER registers. */ - if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) || + test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) { if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) new.control |= PER_EVENT_BRANCH; else @@ -93,6 +97,8 @@ void update_cr_regs(struct task_struct *task) new.control |= PER_CONTROL_SUSPENSION; new.control |= PER_EVENT_TRANSACTION_END; #endif + if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) + new.control |= PER_EVENT_IFETCH; new.start = 0; new.end = PSW_ADDR_INSN; } @@ -803,7 +809,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - if (secure_computing(regs->gprs[2])) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1; goto out; @@ -923,7 +929,15 @@ static int s390_fpregs_get(struct task_struct *target, save_fp_ctl(&target->thread.fp_regs.fpc); save_fp_regs(target->thread.fp_regs.fprs); } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + for (i = 0; i < __NUM_VXRS_LOW; i++) + target->thread.fp_regs.fprs[i] = + *(freg_t *)(target->thread.vxrs + i); + } +#endif return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_regs, 0, -1); } @@ -957,9 +971,20 @@ static int s390_fpregs_set(struct task_struct *target, target->thread.fp_regs.fprs, offsetof(s390_fp_regs, fprs), -1); - if (rc == 0 && target == current) { - restore_fp_ctl(&target->thread.fp_regs.fpc); - restore_fp_regs(target->thread.fp_regs.fprs); + if (rc == 0) { + if (target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_VXRS_LOW; i++) + *(freg_t *)(target->thread.vxrs + i) = + target->thread.fp_regs.fprs[i]; + } +#endif } return rc; @@ -1015,6 +1040,95 @@ static int s390_tdb_set(struct task_struct *target, return 0; } +static int s390_vxrs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return !!target->thread.vxrs; +} + +static int s390_vxrs_low_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_low_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i, rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + if (rc == 0) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i]; + if (target == current) + restore_vx_regs(target->thread.vxrs); + } + + return rc; +} + +static int s390_vxrs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __vector128 vxrs[__NUM_VXRS_HIGH]; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW, + sizeof(vxrs)); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.vxrs + __NUM_VXRS_LOW, 0, -1); + if (rc == 0 && target == current) + restore_vx_regs(target->thread.vxrs); + + return rc; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -1038,7 +1152,7 @@ static int s390_system_call_set(struct task_struct *target, } static const struct user_regset s390_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), @@ -1046,7 +1160,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_regs_get, .set = s390_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), @@ -1054,8 +1168,16 @@ static const struct user_regset s390_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, #ifdef CONFIG_64BIT - [REGSET_LAST_BREAK] = { + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1063,7 +1185,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, .set = s390_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1071,15 +1193,25 @@ static const struct user_regset s390_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, -#endif - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(unsigned int), - .align = sizeof(unsigned int), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, + }, +#endif }; static const struct user_regset_view user_s390_view = { @@ -1244,7 +1376,7 @@ static int s390_compat_last_break_set(struct task_struct *target, } static const struct user_regset s390_compat_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1252,7 +1384,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_get, .set = s390_compat_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1260,7 +1392,15 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, - [REGSET_LAST_BREAK] = { + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1268,7 +1408,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1276,15 +1416,25 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, + }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, }, - [REGSET_GENERAL_EXTENDED] = { + { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 82bc113e8c1d..e80d9ff9a56d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -343,6 +343,9 @@ static void __init setup_lowcore(void) __ctl_set_bit(14, 29); } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -452,8 +455,8 @@ static void __init setup_memory_end(void) #ifdef CONFIG_64BIT vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; - if (tmp <= (1UL << 42)) + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) vmax = 1UL << 42; /* 3-level kernel page table */ else vmax = 1UL << 53; /* 4-level kernel page table */ @@ -765,6 +768,12 @@ static void __init setup_hwcaps(void) */ if (test_facility(50) && test_facility(73)) elf_hwcap |= HWCAP_S390_TE; + + /* + * Vector extension HWCAP_S390_VXRS is bit 11. + */ + if (test_facility(129)) + elf_hwcap |= HWCAP_S390_VXRS; #endif get_cpu_id(&cpu_id); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 469c4c6d9182..0c1a0ff0a558 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -31,30 +31,117 @@ #include <asm/switch_to.h> #include "entry.h" -typedef struct +/* + * Layout of an old-style signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | struct sigcontext | + * | oldmask | + * | _sigregs * | + * ----------------------------------------- + * | _sigregs with | + * | _s390_regs_common | + * | _s390_fp_regs | + * ----------------------------------------- + * | int signo | + * ----------------------------------------- + * | _sigregs_ext with | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt) | + * | reserved 128 byte (opt) | + * ----------------------------------------- + * | __u16 svc_insn | + * ----------------------------------------- + * The svc_insn entry with the sigreturn system call opcode does not + * have a fixed position and moves if gprs_high or vxrs exist. + * Future extensions will be added to _sigregs_ext. + */ +struct sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; struct sigcontext sc; _sigregs sregs; int signo; - __u8 retcode[S390_SYSCALL_SIZE]; -} sigframe; + _sigregs_ext sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ +}; -typedef struct +/* + * Layout of an rt signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | svc __NR_rt_sigreturn 2 byte | + * ----------------------------------------- + * | struct siginfo | + * ----------------------------------------- + * | struct ucontext_extended with | + * | unsigned long uc_flags | + * | struct ucontext *uc_link | + * | stack_t uc_stack | + * | _sigregs uc_mcontext with | + * | _s390_regs_common | + * | _s390_fp_regs | + * | sigset_t uc_sigmask | + * | _sigregs_ext uc_mcontext_ext | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt)| + * | reserved 128 byte (opt) | + * ----------------------------------------- + * Future extensions will be added to _sigregs_ext. + */ +struct rt_sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; struct siginfo info; - struct ucontext uc; -} rt_sigframe; + struct ucontext_extended uc; +}; + +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else +#endif + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else +#endif + restore_fp_regs(current->thread.fp_regs.fprs); +} /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - save_access_regs(current->thread.acrs); - /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ user_sregs.regs.psw.mask = PSW_USER_BITS | @@ -63,12 +150,6 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - /* - * We have to store the fp registers to current->thread.fp_regs - * to merge them with the emulated registers. - */ - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) @@ -107,20 +188,64 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } +/* Returns non-zero on fault. */ +static int save_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } +#endif + return 0; +} + +static int restore_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } +#endif + return 0; +} + SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); - sigframe __user *frame = (sigframe __user *)regs->gprs[15]; + struct sigframe __user *frame = + (struct sigframe __user *) regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) @@ -128,6 +253,9 @@ SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; + if (restore_sigregs_ext(regs, &frame->sregs_ext)) + goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -137,16 +265,20 @@ badframe: SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); - rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_altstack(&frame->uc.uc_stack)) + if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -154,11 +286,6 @@ badframe: } /* - * Set up a signal frame. - */ - - -/* * Determine which stack to use.. */ static inline void __user * @@ -195,39 +322,63 @@ static inline int map_signal(int sig) static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { - sigframe __user *frame; - - frame = get_sigframe(ka, regs, sizeof(sigframe)); + struct sigframe __user *frame; + struct sigcontext sc; + unsigned long restorer; + size_t frame_size; + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); + if (MACHINE_HAS_VX) + frame_size += sizeof(frame->sregs_ext); + frame = get_sigframe(ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; + /* Create struct sigcontext on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE); + sc.sregs = (_sigregs __user __force *) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs on the signal stack */ if (save_sigregs(regs, &frame->sregs)) return -EFAULT; - if (__put_user(&frame->sregs, &frame->sc.sregs)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + return -EFAULT; + + /* Create _sigregs_ext on the signal stack */ + if (save_sigregs_ext(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; + restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + /* Signal frame without vector registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | @@ -247,54 +398,69 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe __user *frame; - - frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe)); + struct rt_sigframe __user *frame; + unsigned long uc_flags, restorer; + size_t frame_size; + frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + uc_flags = 0; +#ifdef CONFIG_64BIT + if (MACHINE_HAS_VX) { + frame_size += sizeof(_sigregs_ext); + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } +#endif + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(NULL, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs(regs, &frame->uc.uc_mcontext); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) + restorer = (unsigned long) ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(NULL, &frame->uc.uc_link) || + __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 243c7e512600..6fd9e60101f1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/debug.h> #include <asm/os_info.h> #include <asm/sigp.h> +#include <asm/idle.h> #include "entry.h" enum { @@ -82,7 +83,8 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, + u32 *status) { int cc; @@ -178,6 +180,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) goto out; } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; #endif @@ -333,12 +338,6 @@ int smp_vcpu_scheduled(int cpu) return pcpu_running(pcpu_devices + cpu); } -void smp_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - void smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) @@ -517,35 +516,53 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); static void __init smp_get_save_area(int cpu, u16 address) { void *lc = pcpu_devices[0].lowcore; - struct save_area *save_area; + struct save_area_ext *sa_ext; + unsigned long vx_sa; if (is_kdump_kernel()) return; if (!OLDMEM_BASE && (address == boot_cpu_address || ipl_info.type != IPL_TYPE_FCP_DUMP)) return; - save_area = dump_save_area_create(cpu); - if (!save_area) + sa_ext = dump_save_area_create(cpu); + if (!sa_ext) panic("could not allocate memory for save area\n"); if (address == boot_cpu_address) { /* Copy the registers of the boot cpu. */ - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return; } /* Get the registers of a non-boot cpu. */ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); + memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); + if (!MACHINE_HAS_VX) + return; + /* Get the VX registers */ + vx_sa = __get_free_page(GFP_KERNEL); + if (!vx_sa) + panic("could not allocate memory for VX save area\n"); + __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); + memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); + free_page(vx_sa); } int smp_store_status(int cpu) { + unsigned long vx_sa; struct pcpu *pcpu; pcpu = pcpu_devices + cpu; if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; + if (!MACHINE_HAS_VX) + return 0; + vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); + __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + vx_sa, NULL); return 0; } @@ -667,7 +684,7 @@ static void smp_start_secondary(void *cpuvoid) cpu_init(); preempt_disable(); init_cpu_timer(); - init_cpu_vtimer(); + vtime_init(); pfault_init(); notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), true); @@ -726,6 +743,7 @@ int __cpu_disable(void) cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ __ctl_load(cregs, 0, 15); + clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } @@ -898,42 +916,6 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; - unsigned int sequence; - - do { - sequence = ACCESS_ONCE(idle->sequence); - idle_count = ACCESS_ONCE(idle->idle_count); - if (ACCESS_ONCE(idle->clock_idle_enter)) - idle_count++; - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return sprintf(buf, "%llu\n", idle_count); -} -static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - -static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_time = ACCESS_ONCE(idle->idle_time); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; - return sprintf(buf, "%llu\n", idle_time >> 12); -} -static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); - static struct attribute *cpu_online_attrs[] = { &dev_attr_idle_count.attr, &dev_attr_idle_time_us.attr, diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607f3711..69e980de0f62 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -232,6 +232,19 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } + + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = + (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + vdso_data->wtom_coarse_sec = + vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_coarse_nsec = + vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; + while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { + vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; + vdso_data->wtom_coarse_sec++; + } + vdso_data->tk_mult = tk->tkr.mult; vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 355a16c55702..b93bed76ea94 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -464,15 +464,17 @@ static struct sched_domain_topology_level s390_topology[] = { static int __init topology_init(void) { - if (!MACHINE_HAS_TOPOLOGY) { + if (MACHINE_HAS_TOPOLOGY) + set_topology_timer(); + else topology_update_polarization_simple(); - goto out; - } - set_topology_timer(); -out: - - set_sched_topology(s390_topology); - return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); + +static int __init early_topology_init(void) +{ + set_sched_topology(s390_topology); + return 0; +} +early_initcall(early_topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c5762324d9ee..9ff5ecba26ab 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -18,6 +18,8 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> +#include <asm/switch_to.h> #include "entry.h" int show_unhandled_signals = 1; @@ -58,15 +60,10 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes do_trap(struct pt_regs *regs, - int si_signo, int si_code, char *str) +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { siginfo_t info; - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) - return; - if (user_mode(regs)) { info.si_signo = si_signo; info.si_errno = 0; @@ -90,6 +87,15 @@ static void __kprobes do_trap(struct pt_regs *regs, } } +static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code, + char *str) +{ + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; + do_report_trap(regs, si_signo, si_code, str); +} + void __kprobes do_per_trap(struct pt_regs *regs) { siginfo_t info; @@ -178,6 +184,7 @@ void __kprobes illegal_op(struct pt_regs *regs) siginfo_t info; __u8 opcode[6]; __u16 __user *location; + int is_uprobe_insn = 0; int signal = 0; location = get_trap_ip(regs); @@ -194,6 +201,10 @@ void __kprobes illegal_op(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); } else signal = SIGILL; +#ifdef CONFIG_UPROBES + } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + is_uprobe_insn = 1; +#endif #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { if (get_user(*((__u16 *) (opcode+2)), location+1)) @@ -219,11 +230,13 @@ void __kprobes illegal_op(struct pt_regs *regs) #endif } else signal = SIGILL; - } else { - /* - * If we get an illegal op in kernel mode, send it through the - * kprobes notifier. If kprobes doesn't pick it up, SIGILL - */ + } + /* + * We got either an illegal op in kernel mode, or user space trapped + * on a uprobes illegal instruction. See if kprobes or uprobes picks + * it up. If not, SIGILL. + */ + if (is_uprobe_insn || !user_mode(regs)) { if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; @@ -292,6 +305,74 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif +#ifdef CONFIG_64BIT +int alloc_vector_registers(struct task_struct *tsk) +{ + __vector128 *vxrs; + int i; + + /* Allocate vector register save area. */ + vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS, + GFP_KERNEL|__GFP_REPEAT); + if (!vxrs) + return -ENOMEM; + preempt_disable(); + if (tsk == current) + save_fp_regs(tsk->thread.fp_regs.fprs); + /* Copy the 16 floating point registers */ + for (i = 0; i < 16; i++) + *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i]; + tsk->thread.vxrs = vxrs; + if (tsk == current) { + __ctl_set_bit(0, 17); + restore_vx_regs(vxrs); + } + preempt_enable(); + return 0; +} + +void vector_exception(struct pt_regs *regs) +{ + int si_code, vic; + + if (!MACHINE_HAS_VX) { + do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); + return; + } + + /* get vector interrupt code from fpc */ + asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + vic = (current->thread.fp_regs.fpc & 0xf00) >> 8; + switch (vic) { + case 1: /* invalid vector operation */ + si_code = FPE_FLTINV; + break; + case 2: /* division by zero */ + si_code = FPE_FLTDIV; + break; + case 3: /* overflow */ + si_code = FPE_FLTOVF; + break; + case 4: /* underflow */ + si_code = FPE_FLTUND; + break; + case 5: /* inexact */ + si_code = FPE_FLTRES; + break; + default: /* unknown cause */ + si_code = 0; + } + do_trap(regs, SIGFPE, si_code, "vector exception"); +} + +static int __init disable_vector_extension(char *str) +{ + S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; + return 1; +} +__setup("novx", disable_vector_extension); +#endif + void data_exception(struct pt_regs *regs) { __u16 __user *location; @@ -357,6 +438,18 @@ void data_exception(struct pt_regs *regs) } } #endif +#ifdef CONFIG_64BIT + /* Check for vector register enablement */ + if (MACHINE_HAS_VX && !current->thread.vxrs && + (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) { + alloc_vector_registers(current); + /* Vector data exception is suppressing, rewind psw. */ + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + clear_pt_regs_flag(regs, PIF_PER_TRAP); + return; + } +#endif + if (current->thread.fp_regs.fpc & FPC_DXC_MASK) signal = SIGFPE; else diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c new file mode 100644 index 000000000000..956f4f7a591c --- /dev/null +++ b/arch/s390/kernel/uprobes.c @@ -0,0 +1,332 @@ +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/uprobes.h> +#include <linux/compat.h> +#include <linux/kdebug.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/dis.h> +#include "entry.h" + +#define UPROBE_TRAP_NR UINT_MAX + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + return probe_is_prohibited_opcode(auprobe->insn); +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) + return -EINVAL; + if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) + return -EINVAL; + clear_pt_regs_flag(regs, PIF_PER_TRAP); + auprobe->saved_per = psw_bits(regs->psw).r; + auprobe->saved_int_code = regs->int_code; + regs->int_code = UPROBE_TRAP_NR; + regs->psw.addr = current->utask->xol_vaddr; + set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + + if (regs->int_code != UPROBE_TRAP_NR) + return true; + return false; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int fixup = probe_get_fixup_type(auprobe->insn); + struct uprobe_task *utask = current->utask; + + clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + psw_bits(regs->psw).r = auprobe->saved_per; + regs->int_code = auprobe->saved_int_code; + + if (fixup & FIXUP_PSW_NORMAL) + regs->psw.addr += utask->vaddr - utask->xol_vaddr; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (auprobe->insn[0] & 0xf0) >> 4; + + regs->gprs[reg] += utask->vaddr - utask->xol_vaddr; + } + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(auprobe->insn[0] >> 8); + + if (regs->psw.addr - utask->xol_vaddr == ilen) + regs->psw.addr = utask->vaddr + ilen; + } + /* If per tracing was active generate trap */ + if (regs->psw.mask & PSW_MASK_PER) + do_per_trap(regs); + return 0; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!user_mode(regs)) + return NOTIFY_DONE; + if (regs->int_code & 0x200) /* Trap during transaction */ + return NOTIFY_DONE; + switch (val) { + case DIE_BPT: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_SSTEP: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + clear_thread_flag(TIF_UPROBE_SINGLESTEP); + regs->int_code = auprobe->saved_int_code; + regs->psw.addr = current->utask->vaddr; +} + +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs) +{ + unsigned long orig; + + orig = regs->gprs[14]; + regs->gprs[14] = trampoline; + return orig; +} + +/* Instruction Emulation */ + +static void adjust_psw_addr(psw_t *psw, unsigned long len) +{ + psw->addr = __rewind_psw(*psw, -len); +} + +#define EMU_ILLEGAL_OP 1 +#define EMU_SPECIFICATION 2 +#define EMU_ADDRESSING 3 + +#define emu_load_ril(ptr, output) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else \ + *(output) = input; \ + __rc; \ +}) + +#define emu_store_ril(ptr, input) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (put_user(*(input), ptr)) \ + __rc = EMU_ADDRESSING; \ + __rc; \ +}) + +#define emu_cmp_ril(regs, ptr, cmp) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else if (input > *(cmp)) \ + psw_bits((regs)->psw).cc = 1; \ + else if (input < *(cmp)) \ + psw_bits((regs)->psw).cc = 2; \ + else \ + psw_bits((regs)->psw).cc = 0; \ + __rc; \ +}) + +struct insn_ril { + u8 opc0; + u8 reg : 4; + u8 opc1 : 4; + s32 disp; +} __packed; + +union split_register { + u64 u64; + u32 u32[2]; + u16 u16[4]; + s64 s64; + s32 s32[2]; + s16 s16[4]; +}; + +/* + * pc relative instructions are emulated, since parameters may not be + * accessible from the xol area due to range limitations. + */ +static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + union split_register *rx; + struct insn_ril *insn; + unsigned int ilen; + void *uptr; + int rc = 0; + + insn = (struct insn_ril *) &auprobe->insn; + rx = (union split_register *) ®s->gprs[insn->reg]; + uptr = (void *)(regs->psw.addr + (insn->disp * 2)); + ilen = insn_length(insn->opc0); + + switch (insn->opc0) { + case 0xc0: + switch (insn->opc1) { + case 0x00: /* larl */ + rx->u64 = (unsigned long)uptr; + break; + } + break; + case 0xc4: + switch (insn->opc1) { + case 0x02: /* llhrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]); + break; + case 0x04: /* lghrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u64); + break; + case 0x05: /* lhrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]); + break; + case 0x06: /* llghrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u64); + break; + case 0x08: /* lgrl */ + rc = emu_load_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* lgfrl */ + rc = emu_load_ril((s32 __user *)uptr, &rx->u64); + break; + case 0x0d: /* lrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]); + break; + case 0x0e: /* llgfrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u64); + break; + case 0x07: /* sthrl */ + rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]); + break; + case 0x0b: /* stgrl */ + rc = emu_store_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0f: /* strl */ + rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + case 0xc6: + switch (insn->opc1) { + case 0x02: /* pfdrl */ + if (!test_facility(34)) + rc = EMU_ILLEGAL_OP; + break; + case 0x04: /* cghrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); + break; + case 0x05: /* chrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]); + break; + case 0x06: /* clghrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64); + break; + case 0x07: /* clhrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]); + break; + case 0x08: /* cgrl */ + rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64); + break; + case 0x0a: /* clgrl */ + rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* cgfrl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64); + break; + case 0x0d: /* crl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]); + break; + case 0x0e: /* clgfrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64); + break; + case 0x0f: /* clrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + } + adjust_psw_addr(®s->psw, ilen); + switch (rc) { + case EMU_ILLEGAL_OP: + regs->int_code = ilen << 16 | 0x0001; + do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL); + break; + case EMU_SPECIFICATION: + regs->int_code = ilen << 16 | 0x0006; + do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL); + break; + case EMU_ADDRESSING: + regs->int_code = ilen << 16 | 0x0005; + do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL); + break; + } +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) || + ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) && + !is_compat_task())) { + regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); + do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); + return true; + } + if (probe_is_insn_relative_long(auprobe->insn)) { + handle_insn_ril(auprobe, regs); + return true; + } + return false; +} diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index 36aaa25d05da..eca3f001f081 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -19,14 +19,20 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + basr %r1,0 + la %r1,4f-.(%r1) chi %r2,__CLOCK_REALTIME je 0f chi %r2,__CLOCK_MONOTONIC + je 0f + la %r1,5f-4f(%r1) + chi %r2,__CLOCK_REALTIME_COARSE + je 0f + chi %r2,__CLOCK_MONOTONIC_COARSE jne 3f 0: ltr %r3,%r3 jz 2f /* res == NULL */ - basr %r1,0 -1: l %r0,4f-1b(%r1) +1: l %r0,0(%r1) xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ st %r0,4(%r3) /* store tp->tv_usec */ 2: lhi %r2,0 @@ -35,5 +41,6 @@ __kernel_clock_getres: svc 0 br %r14 4: .long __CLOCK_REALTIME_RES +5: .long __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 7cf18f8d4cb4..48c2206a3956 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -21,8 +21,12 @@ __kernel_clock_gettime: .cfi_startproc basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME_COARSE + je 10f chi %r2,__CLOCK_REALTIME je 11f + chi %r2,__CLOCK_MONOTONIC_COARSE + je 9f chi %r2,__CLOCK_MONOTONIC jne 19f @@ -30,8 +34,8 @@ __kernel_clock_gettime: 1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,2f @@ -68,12 +72,32 @@ __kernel_clock_gettime: lhi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 9b + l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) + l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 9b + j 8b + + /* CLOCK_REALTIME_COARSE */ +10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 10b + l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) + l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 10b + j 17f + /* CLOCK_REALTIME */ 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 11b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,12f diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index fd621a950f7c..60def5f562db 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -29,8 +29,8 @@ __kernel_gettimeofday: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,3f diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 34deba7c7ed1..c8513deb8c66 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -19,6 +19,12 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + larl %r1,4f + cghi %r2,__CLOCK_REALTIME_COARSE + je 0f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 0f + larl %r1,3f cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -32,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - larl %r1,3f lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ @@ -42,5 +47,6 @@ __kernel_clock_getres: svc 0 br %r14 3: .quad __CLOCK_REALTIME_RES +4: .quad __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 3f34e09db5f4..9d9761f8e110 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -20,12 +20,16 @@ __kernel_clock_gettime: .cfi_startproc larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME_COARSE + je 4f cghi %r2,__CLOCK_REALTIME je 5f cghi %r2,__CLOCK_THREAD_CPUTIME_ID je 9f cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ je 9f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 3f cghi %r2,__CLOCK_MONOTONIC jne 12f @@ -33,10 +37,10 @@ __kernel_clock_gettime: 0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_WTOM_NSEC(%r5) @@ -54,13 +58,33 @@ __kernel_clock_gettime: lghi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 3b + lg %r0,__VDSO_WTOM_CRS_SEC(%r5) + lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 3b + j 2b + + /* CLOCK_REALTIME_COARSE */ +4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 4b + lg %r0,__VDSO_XTIME_CRS_SEC(%r5) + lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 4b + j 7f + /* CLOCK_REALTIME */ 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 5b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index d0860d1d0ccc..7a344995a97f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -28,8 +28,8 @@ __kernel_gettimeofday: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) + stcke 48(%r15) /* Store TOD clock */ + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8c34363d6f1e..416f2a323ba5 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -6,27 +6,18 @@ */ #include <linux/kernel_stat.h> -#include <linux/notifier.h> -#include <linux/kprobes.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/vtimer.h> #include <asm/vtime.h> -#include <asm/irq.h> -#include "entry.h" static void virt_timer_expire(void); -DEFINE_PER_CPU(struct s390_idle_data, s390_idle); - static LIST_HEAD(virt_timer_list); static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; @@ -152,49 +143,6 @@ void vtime_account_system(struct task_struct *tsk) __attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - unsigned long long idle_time; - unsigned long psw_mask; - - trace_hardirqs_on(); - - /* Wait for external, I/O or machine check interrupt. */ - psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - idle->nohz_delay = 0; - - /* Call the assembler magic in entry.S */ - psw_idle(idle, psw_mask); - - /* Account time spent with enabled wait psw loaded as idle time. */ - idle->sequence++; - smp_wmb(); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; - idle->idle_time += idle_time; - idle->idle_count++; - account_idle_time(idle_time); - smp_wmb(); - idle->sequence++; -} - -cputime64_t s390_get_idle_time(int cpu) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; -} - /* * Sorted add to a list. List is linear searched until first bigger * element is found. @@ -372,31 +320,8 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void init_cpu_vtimer(void) +void vtime_init(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); } - -static int s390_nohz_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - struct s390_idle_data *idle; - long cpu = (long) hcpu; - - idle = &per_cpu(s390_idle, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DYING: - idle->nohz_delay = 0; - default: - break; - } - return NOTIFY_OK; -} - -void __init vtime_init(void) -{ - /* Enable cpu timer interrupts on the boot cpu. */ - init_cpu_vtimer(); - cpu_notifier(s390_nohz_notify, 0); -} diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index c6d752e8bf28..a01df233856f 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -6,3 +6,5 @@ lib-y += delay.o string.o uaccess.o find.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_SMP) += spinlock.o +lib-$(CONFIG_KPROBES) += probes.o +lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a9f3d0042d58..16dc42d83f93 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -43,7 +43,7 @@ static void __udelay_disabled(unsigned long long usecs) lockdep_off(); do { set_clock_comparator(end); - vtime_stop_cpu(); + enabled_wait(); } while (get_tod_clock_fast() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -62,7 +62,7 @@ static void __udelay_enabled(unsigned long long usecs) clock_saved = local_tick_disable(); set_clock_comparator(end); } - vtime_stop_cpu(); + enabled_wait(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock_fast() < end); diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c new file mode 100644 index 000000000000..c5d64a099719 --- /dev/null +++ b/arch/s390/lib/probes.c @@ -0,0 +1,159 @@ +/* + * Common helper functions for kprobes and uprobes + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/kprobes.h> +#include <asm/dis.h> + +int probe_is_prohibited_opcode(u16 *insn) +{ + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; + switch (insn[0] >> 8) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ + return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } + } + switch (insn[0]) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + case 0xb98d: /* epsw */ + case 0xe560: /* tbegin */ + case 0xe561: /* tbeginc */ + case 0xb2f8: /* tend */ + return -EINVAL; + } + return 0; +} + +int probe_get_fixup_type(u16 *insn) +{ + /* default fixup method */ + int fixup = FIXUP_PSW_NORMAL; + + switch (insn[0] >> 8) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xa7: /* bras */ + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xc0: + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + case 0xe3: /* bctg */ + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + } + return fixup; +} + +int probe_is_insn_relative_long(u16 *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 5b0e445bc3f3..034a35a3e9c1 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -98,17 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) } EXPORT_SYMBOL(arch_spin_lock_wait_flags); -void arch_spin_relax(arch_spinlock_t *lp) -{ - unsigned int cpu = lp->lock; - if (cpu != 0) { - if (MACHINE_IS_VM || MACHINE_IS_KVM || - !smp_vcpu_scheduled(~cpu)) - smp_yield_cpu(~cpu); - } -} -EXPORT_SYMBOL(arch_spin_relax); - int arch_spin_trylock_retry(arch_spinlock_t *lp) { int count; @@ -122,15 +111,21 @@ EXPORT_SYMBOL(arch_spin_trylock_retry); void _raw_read_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); +#endif + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if ((int) old < 0) continue; if (_raw_compare_and_swap(&rw->lock, old, old + 1)) @@ -139,28 +134,6 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_lock_wait); -void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) -{ - unsigned int old; - int count = spin_retry; - - local_irq_restore(flags); - while (1) { - if (count-- <= 0) { - smp_yield(); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - if ((int) old < 0) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) - return; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(_raw_read_lock_wait_flags); - int _raw_read_trylock_retry(arch_rwlock_t *rw) { unsigned int old; @@ -177,46 +150,62 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_trylock_retry); -void _raw_write_lock_wait(arch_rwlock_t *rw) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); - if (old) - continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; + owner = ACCESS_ONCE(rw->owner); + smp_rmb(); + if ((int) old >= 0) { + prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + old = prev; + } + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; } } EXPORT_SYMBOL(_raw_write_lock_wait); -void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old, prev; int count = spin_retry; - local_irq_restore(flags); + prev = 0x80000000; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); - if (old) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; - local_irq_restore(flags); + owner = ACCESS_ONCE(rw->owner); + if ((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) + prev = old; + else + smp_rmb(); + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; } } -EXPORT_SYMBOL(_raw_write_lock_wait_flags); +EXPORT_SYMBOL(_raw_write_lock_wait); + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ int _raw_write_trylock_retry(arch_rwlock_t *rw) { @@ -233,3 +222,13 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) return 0; } EXPORT_SYMBOL(_raw_write_trylock_retry); + +void arch_lock_relax(unsigned int cpu) +{ + if (!cpu) + return; + if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu)) + return; + smp_yield_cpu(~cpu); +} +EXPORT_SYMBOL(arch_lock_relax); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 46d517c3c763..d46cadeda204 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -54,7 +54,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) return; } seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); - seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -129,7 +128,7 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK _SEGMENT_ENTRY_PROTECT #else #define _PMD_PROT_MASK 0 #endif @@ -157,7 +156,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) +#define _PUD_PROT_MASK _REGION3_ENTRY_RO #else #define _PUD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 389bc17934b7..3c80d2e38f03 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -88,7 +88,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8400f494623f..3fef3b299665 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <asm/cacheflush.h> +#include <asm/facility.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -103,27 +104,50 @@ int set_memory_x(unsigned long addr, int numpages) } #ifdef CONFIG_DEBUG_PAGEALLOC + +static void ipte_range(pte_t *pte, unsigned long address, int nr) +{ + int i; + + if (test_facility(13) && IS_ENABLED(CONFIG_64BIT)) { + __ptep_ipte_range(address, nr - 1, pte); + return; + } + for (i = 0; i < nr; i++) { + __ptep_ipte(address, pte); + address += PAGE_SIZE; + pte++; + } +} + void kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + int nr, i, j; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - int i; - for (i = 0; i < numpages; i++) { + for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); - if (!enable) { - __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_INVALID; - continue; + nr = (unsigned long)pte >> ilog2(sizeof(long)); + nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); + nr = min(numpages - i, nr); + if (enable) { + for (j = 0; j < nr; j++) { + pte_val(*pte) = __pa(address); + address += PAGE_SIZE; + pte++; + } + } else { + ipte_range(pte, address, nr); } - pte_val(*pte) = __pa(address); + i += nr; } } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fdbd7888cb07..b1593c2f751a 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -236,8 +236,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!new_page) goto out; pmd_val(*pm_dir) = __pa(new_page) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_CO; + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -253,9 +252,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pt_dir = pte_offset_kernel(pm_dir, address); if (pte_none(*pt_dir)) { - unsigned long new_page; + void *new_page; - new_page =__pa(vmem_alloc_pages(0)); + new_page = vmemmap_alloc_block(PAGE_SIZE, node); if (!new_page) goto out; pte_val(*pt_dir) = @@ -263,7 +262,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) } address += PAGE_SIZE; } - memset((void *)start, 0, end - start); ret = 0; out: return ret; diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index 6915d28cf118..87bc86821bc9 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -39,7 +39,8 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT bool - default n + default y + select STACKTRACE config GENERIC_CALIBRATE_DELAY bool diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 7d26d9c0b2fb..f70dd540655d 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -659,10 +659,6 @@ static int __init eth_setup(char *str) } new = alloc_bootmem(sizeof(*new)); - if (new == NULL) { - printk(KERN_ERR "eth_init : alloc_bootmem failed\n"); - return 1; - } INIT_LIST_HEAD(&new->list); new->index = n; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 3716e6952554..e8ab93c3e638 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1277,7 +1277,7 @@ static void do_ubd_request(struct request_queue *q) while(1){ struct ubd *dev = q->queuedata; - if(dev->end_sg == 0){ + if(dev->request == NULL){ struct request *req = blk_fetch_request(q); if(req == NULL) return; @@ -1299,7 +1299,8 @@ static void do_ubd_request(struct request_queue *q) return; } prepare_flush_request(req, io_req); - submit_request(io_req, dev); + if (submit_request(io_req, dev) == false) + return; } while(dev->start_sg < dev->end_sg){ diff --git a/arch/um/include/asm/stacktrace.h b/arch/um/include/asm/stacktrace.h new file mode 100644 index 000000000000..9a864328c67f --- /dev/null +++ b/arch/um/include/asm/stacktrace.h @@ -0,0 +1,42 @@ +#ifndef _ASM_UML_STACKTRACE_H +#define _ASM_UML_STACKTRACE_H + +#include <linux/uaccess.h> +#include <linux/ptrace.h> + +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stacktrace_ops { + void (*address)(void *data, unsigned long address, int reliable); +}; + +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long +get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + if (!task || task == current) + return segv_regs ? PT_REGS_BP(segv_regs) : current_bp(); + return KSTK_EBP(task); +} +#else +static inline unsigned long +get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + return 0; +} +#endif + +static inline unsigned long +*get_stack_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + if (!task || task == current) + return segv_regs ? (unsigned long *)PT_REGS_SP(segv_regs) : current_sp(); + return (unsigned long *)KSTK_ESP(task); +} + +void dump_trace(struct task_struct *tsk, const struct stacktrace_ops *ops, void *data); + +#endif /* _ASM_UML_STACKTRACE_H */ diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index 46384acd547b..cb84414e3e66 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -49,7 +49,7 @@ extern int iomem_size; extern int init_mem_user(void); extern void setup_memory(void *entry); extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern int init_maps(unsigned long physmem, unsigned long iomem, +extern void mem_total_pages(unsigned long physmem, unsigned long iomem, unsigned long highmem); extern unsigned long get_vm(unsigned long len); extern void setup_physmem(unsigned long start, unsigned long usable, diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index d8b78a03855c..2d840a070c8b 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o USER_OBJS := config.o diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 30fdd5d0067b..549ecf3f5857 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,39 +22,19 @@ EXPORT_SYMBOL(high_physmem); extern unsigned long long physmem_size; -int __init init_maps(unsigned long physmem, unsigned long iomem, +void __init mem_total_pages(unsigned long physmem, unsigned long iomem, unsigned long highmem) { - struct page *p, *map; - unsigned long phys_len, phys_pages, highmem_len, highmem_pages; - unsigned long iomem_len, iomem_pages, total_len, total_pages; - int i; - - phys_pages = physmem >> PAGE_SHIFT; - phys_len = phys_pages * sizeof(struct page); - - iomem_pages = iomem >> PAGE_SHIFT; - iomem_len = iomem_pages * sizeof(struct page); + unsigned long phys_pages, highmem_pages; + unsigned long iomem_pages, total_pages; + phys_pages = physmem >> PAGE_SHIFT; + iomem_pages = iomem >> PAGE_SHIFT; highmem_pages = highmem >> PAGE_SHIFT; - highmem_len = highmem_pages * sizeof(struct page); - - total_pages = phys_pages + iomem_pages + highmem_pages; - total_len = phys_len + iomem_len + highmem_len; - map = alloc_bootmem_low_pages(total_len); - if (map == NULL) - return -ENOMEM; - - for (i = 0; i < total_pages; i++) { - p = &map[i]; - memset(p, 0, sizeof(struct page)); - SetPageReserved(p); - INIT_LIST_HEAD(&p->lru); - } + total_pages = phys_pages + iomem_pages + highmem_pages; max_mapnr = total_pages; - return 0; } void map_memory(unsigned long virt, unsigned long phys, unsigned long len, diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c new file mode 100644 index 000000000000..ebe7bcf62684 --- /dev/null +++ b/arch/um/kernel/stacktrace.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2013 Richard Weinberger <richard@nod.at> + * Copyright (C) 2014 Google Inc., Author: Daniel Walter <dwalter@google.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <asm/stacktrace.h> + +void dump_trace(struct task_struct *tsk, + const struct stacktrace_ops *ops, + void *data) +{ + int reliable = 0; + unsigned long *sp, bp, addr; + struct pt_regs *segv_regs = tsk->thread.segv_regs; + struct stack_frame *frame; + + bp = get_frame_pointer(tsk, segv_regs); + sp = get_stack_pointer(tsk, segv_regs); + + frame = (struct stack_frame *)bp; + while (((long) sp & (THREAD_SIZE-1)) != 0) { + addr = *sp; + if (__kernel_text_address(addr)) { + reliable = 0; + if ((unsigned long) sp == bp + sizeof(long)) { + frame = frame ? frame->next_frame : NULL; + bp = (unsigned long)frame; + reliable = 1; + } + ops->address(data, addr, reliable); + } + sp++; + } +} + +static void save_addr(void *data, unsigned long address, int reliable) +{ + struct stack_trace *trace = data; + + if (!reliable) + return; + if (trace->nr_entries >= trace->max_entries) + return; + + trace->entries[trace->nr_entries++] = address; +} + +static const struct stacktrace_ops dump_ops = { + .address = save_addr +}; + +static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace) +{ + dump_trace(tsk, &dump_ops, trace); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 799d7e413bf5..894c8d303cda 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -12,57 +12,20 @@ #include <linux/module.h> #include <linux/sched.h> #include <asm/sysrq.h> +#include <asm/stacktrace.h> #include <os.h> -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static void do_stack_trace(unsigned long *sp, unsigned long bp) +static void _print_addr(void *data, unsigned long address, int reliable) { - int reliable; - unsigned long addr; - struct stack_frame *frame = (struct stack_frame *)bp; - - printk(KERN_INFO "Call Trace:\n"); - while (((long) sp & (THREAD_SIZE-1)) != 0) { - addr = *sp; - if (__kernel_text_address(addr)) { - reliable = 0; - if ((unsigned long) sp == bp + sizeof(long)) { - frame = frame ? frame->next_frame : NULL; - bp = (unsigned long)frame; - reliable = 1; - } - - printk(KERN_INFO " [<%08lx>]", addr); - printk(KERN_CONT " %s", reliable ? "" : "? "); - print_symbol(KERN_CONT "%s", addr); - printk(KERN_CONT "\n"); - } - sp++; - } - printk(KERN_INFO "\n"); + pr_info(" [<%08lx>]", address); + pr_cont(" %s", reliable ? "" : "? "); + print_symbol("%s", address); + pr_cont("\n"); } -static unsigned long get_frame_pointer(struct task_struct *task, - struct pt_regs *segv_regs) -{ - if (!task || task == current) - return segv_regs ? PT_REGS_BP(segv_regs) : current_bp(); - else - return KSTK_EBP(task); -} - -static unsigned long *get_stack_pointer(struct task_struct *task, - struct pt_regs *segv_regs) -{ - if (!task || task == current) - return segv_regs ? (unsigned long *)PT_REGS_SP(segv_regs) : current_sp(); - else - return (unsigned long *)KSTK_ESP(task); -} +static const struct stacktrace_ops stackops = { + .address = _print_addr +}; void show_stack(struct task_struct *task, unsigned long *stack) { @@ -71,7 +34,7 @@ void show_stack(struct task_struct *task, unsigned long *stack) int i; if (!segv_regs && os_is_signal_stack()) { - printk(KERN_ERR "Received SIGSEGV in SIGSEGV handler," + pr_err("Received SIGSEGV in SIGSEGV handler," " aborting stack trace!\n"); return; } @@ -83,16 +46,18 @@ void show_stack(struct task_struct *task, unsigned long *stack) if (!stack) sp = get_stack_pointer(task, segv_regs); - printk(KERN_INFO "Stack:\n"); + pr_info("Stack:\n"); stack = sp; for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) { if (kstack_end(stack)) break; if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %08lx", *stack++); + pr_cont("\n"); + pr_cont(" %08lx", *stack++); } - printk(KERN_CONT "\n"); + pr_cont("\n"); - do_stack_trace(sp, bp); + pr_info("Call Trace:\n"); + dump_trace(current, &stackops, NULL); + pr_info("\n"); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 016adf0985d5..9274eae6ae7b 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -348,12 +348,7 @@ int __init linux_main(int argc, char **argv) start_vm = VMALLOC_START; setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); - if (init_maps(physmem_size, iomem_size, highmem)) { - printf("Failed to allocate mem_map for %Lu bytes of physical " - "memory and %Lu bytes of highmem\n", physmem_size, - highmem); - exit(1); - } + mem_total_pages(physmem_size, iomem_size, highmem); virtmem_size = physmem_size; stack = (unsigned long) argv; diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index 7cab8c08e6d1..aff152c87cf4 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,4 +1,6 @@ boot/compressed/vmlinux tools/test_get_len tools/insn_sanity +purgatory/kexec-purgatory.c +purgatory/purgatory.ro diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3eb8a41509b3..f2327e88e07c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -491,6 +491,36 @@ config X86_INTEL_LPSS things like clock tree (common clock framework) and pincontrol which are needed by the LPSS peripheral drivers. +config IOSF_MBI + tristate "Intel SoC IOSF Sideband support for SoC platforms" + depends on PCI + ---help--- + This option enables sideband register access support for Intel SoC + platforms. On these platforms the IOSF sideband is used in lieu of + MSR's for some register accesses, mostly but not limited to thermal + and power. Drivers may query the availability of this device to + determine if they need the sideband in order to work on these + platforms. The sideband is available on the following SoC products. + This list is not meant to be exclusive. + - BayTrail + - Braswell + - Quark + + You should say Y if you are running a kernel on one of these SoC's. + +config IOSF_MBI_DEBUG + bool "Enable IOSF sideband access through debugfs" + depends on IOSF_MBI && DEBUG_FS + ---help--- + Select this option to expose the IOSF sideband access registers (MCR, + MDR, MCRX) through debugfs to write and read register information from + different units on the SoC. This is most useful for obtaining device + state information for debug and analysis. As this is a general access + mechanism, users of this option would have specific knowledge of the + device they want to access. + + If you don't require the option or are in doubt, say N. + config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 @@ -2454,11 +2484,6 @@ config X86_DMA_REMAP bool depends on STA2X11 -config IOSF_MBI - tristate - default m - depends on PCI - config PMC_ATOM def_bool y depends on PCI diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 7c68808edeb7..bb1376381985 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -194,7 +194,7 @@ static bool mem_avoid_overlap(struct mem_vector *img) while (ptr) { struct mem_vector avoid; - avoid.start = (u64)ptr; + avoid.start = (unsigned long)ptr; avoid.size = sizeof(*ptr) + ptr->len; if (mem_overlaps(img, &avoid)) diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 4579eff0ef4d..637097e66a62 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -16,6 +16,7 @@ #include <stdio.h> #include "../include/asm/required-features.h" +#include "../include/asm/disabled-features.h" #include "../include/asm/cpufeature.h" #include "../kernel/cpu/capflags.c" diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4299eb05023c..711de084ab57 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -151,6 +151,16 @@ ENTRY(ia32_sysenter_target) 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC + + /* + * Sysenter doesn't filter flags, so we need to clear NT + * ourselves. To save a few cycles, we can check whether + * NT was set instead of doing an unconditional popfq. + */ + testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ + jnz sysenter_fix_flags +sysenter_flags_fixed: + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -184,6 +194,8 @@ sysexit_from_sys_call: TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 + CFI_RESTORE_STATE + #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r9d /* 6th arg: 4th syscall arg */ @@ -226,7 +238,6 @@ sysexit_from_sys_call: .endm sysenter_auditsys: - CFI_RESTORE_STATE auditsys_entry_common movl %ebp,%r9d /* reload 6th syscall arg */ jmp sysenter_dispatch @@ -235,6 +246,11 @@ sysexit_audit: auditsys_exit sysexit_from_sys_call #endif +sysenter_fix_flags: + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq_cfi + jmp sysenter_flags_fixed + sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index cb4c73bfeb48..76659b67fd11 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -85,7 +85,7 @@ For 32-bit we have the following conventions - kernel is built with #define ARGOFFSET R11 #define SWFRAME ORIG_RAX - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 + .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip movq_cfi rdi, 8*8 @@ -96,7 +96,11 @@ For 32-bit we have the following conventions - kernel is built with movq_cfi rcx, 5*8 .endif + .if \rax_enosys + movq $-ENOSYS, 4*8(%rsp) + .else movq_cfi rax, 4*8 + .endif .if \save_r891011 movq_cfi r8, 3*8 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 094292a63e74..0bb1335313b2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,6 +8,10 @@ #include <asm/required-features.h> #endif +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include <asm/disabled-features.h> +#endif + #define NCAPINTS 11 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ @@ -282,6 +286,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) +#define DISABLED_MASK_BIT_SET(bit) \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) ) + #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) @@ -290,6 +306,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ + cpu_has(&boot_cpu_data, bit)) + #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) @@ -304,11 +332,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) @@ -324,9 +350,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) -#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) -#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) #define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) #define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) #define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) @@ -361,25 +384,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) -#ifdef CONFIG_X86_64 - -#undef cpu_has_vme -#define cpu_has_vme 0 - -#undef cpu_has_pae -#define cpu_has_pae ___BUG___ - -#undef cpu_has_k6_mtrr -#define cpu_has_k6_mtrr 0 - -#undef cpu_has_cyrix_arr -#define cpu_has_cyrix_arr 0 - -#undef cpu_has_centaur_mcr -#define cpu_has_centaur_mcr 0 - -#endif /* CONFIG_X86_64 */ - #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); extern bool __static_cpu_has_safe(u16 bit); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h new file mode 100644 index 000000000000..97534a7d38e3 --- /dev/null +++ b/arch/x86/include/asm/disabled-features.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_DISABLED_FEATURES_H +#define _ASM_X86_DISABLED_FEATURES_H + +/* These features, although they might be available in a CPU + * will not be used because the compile options to support + * them are not present. + * + * This code allows them to be checked and disabled at + * compile time without an explicit #ifdef. Use + * cpu_feature_enabled(). + */ + +#ifdef CONFIG_X86_64 +# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) +# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) +# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) +# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +#else +# define DISABLE_VME 0 +# define DISABLE_K6_MTRR 0 +# define DISABLE_CYRIX_ARR 0 +# define DISABLE_CENTAUR_MCR 0 +#endif /* CONFIG_X86_64 */ + +/* + * Make sure to add features to the correct mask + */ +#define DISABLED_MASK0 (DISABLE_VME) +#define DISABLED_MASK1 0 +#define DISABLED_MASK2 0 +#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) +#define DISABLED_MASK4 0 +#define DISABLED_MASK5 0 +#define DISABLED_MASK6 0 +#define DISABLED_MASK7 0 +#define DISABLED_MASK8 0 +#define DISABLED_MASK9 0 + +#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1a055c81d864..ca3347a9dab5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -160,8 +160,9 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS # error "The following code assumes __USER32_DS == __USER_DS" diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 412ececa00b9..e97622f57722 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -344,7 +344,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk) static inline void __thread_fpu_begin(struct task_struct *tsk) { - if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU)) + if (!use_eager_fpu()) clts(); __thread_set_has_fpu(tsk); } diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 9067166409bf..bbe296e0bce1 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -43,7 +43,7 @@ struct extended_sigtable { #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ ((struct microcode_intel *)mc)->hdr.totalsize : \ DEFAULT_UCODE_TOTALSIZE) diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 4064acae625d..01b493e5a99b 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -9,7 +9,6 @@ #ifdef CONFIG_NUMA #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) /* * Too small node sizes may confuse the VM badly. Usually they diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 9ee322103c6d..b6c0b404898a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,9 +32,6 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); -extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); - - /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3874693c0e53..4572b2f30237 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -116,7 +116,8 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -extern void sync_global_pgds(unsigned long start, unsigned long end); +extern void sync_global_pgds(unsigned long start, unsigned long end, + int removed); /* * Conversion functions: convert a page and protection to a page entry, diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6205f0c434db..86fc2bb82287 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -75,6 +75,11 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); + +extern unsigned long syscall_trace_enter_phase1(struct pt_regs *, u32 arch); +extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch, + unsigned long phase1_result); + extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ae915391ebec..4128b5fcb559 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -32,7 +32,7 @@ static int numachip_system __read_mostly; -static const struct apic apic_numachip __read_mostly; +static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 004f017aa7b9..8e9dcfd630e4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -204,7 +204,6 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { -#ifdef CONFIG_SMP unsigned long val; int pnode; @@ -223,7 +222,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); atomic_set(&init_deasserted, 1); -#endif return 0; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8b699bff0099..3eff36f719fb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1186,7 +1186,7 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC); + X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } /* @@ -1400,7 +1400,7 @@ void cpu_init(void) printk(KERN_INFO "Initializing CPU#%d\n", cpu); - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_current_idt(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 74e804ddc5c7..1ef456273172 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 @@ -382,6 +397,13 @@ static void init_intel(struct cpuinfo_x86 *c) } l2 = init_intel_cacheinfo(c); + + /* Detect legacy cache sizes if init_intel_cacheinfo did not */ + if (l2 == 0) { + cpu_detect_cache_sizes(c); + l2 = c->x86_cache_size; + } + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -485,6 +507,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; + + /* + * Intel Quark SoC X1000 contains a 4-way set associative + * 16K cache with a 16 byte cache line and 256 lines per tag + */ + if ((c->x86 == 5) && (c->x86_model == 9)) + size = 16; return size; } #endif @@ -686,7 +715,8 @@ static const struct cpu_dev intel_cpu_dev = { [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" + [8] = "Mobile Pentium MMX", + [9] = "Quark SoC X1000", } }, { .family = 6, .model_names = diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 36a1bb6d1ee0..1af51b1586d7 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -498,8 +498,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); + if (system_state == SYSTEM_BOOTING) + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 617a9e284245..7aa1acc79789 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -27,7 +27,7 @@ static u32 ucode_new_rev; u8 amd_ucode_patch[PATCH_MAX_SIZE]; static u16 this_equiv_id; -struct cpio_data ucode_cpio; +static struct cpio_data ucode_cpio; /* * Microcode patch container file is prepended to the initrd in cpio format. diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a276fa75d9b5..c6826d1e8082 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -127,7 +127,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) return get_matching_microcode(csig, cpf, mc_intel, crev); } -int apply_microcode(int cpu) +static int apply_microcode_intel(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; @@ -314,7 +314,7 @@ static struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode, + .apply_microcode = apply_microcode_intel, .microcode_fini_cpu = microcode_fini_cpu, }; diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 18f739129e72..b88343f7a3b3 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -28,8 +28,8 @@ #include <asm/tlbflush.h> #include <asm/setup.h> -unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; -struct mc_saved_data { +static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +static struct mc_saved_data { unsigned int mc_saved_count; struct microcode_intel **mc_saved; } mc_saved_data; @@ -415,7 +415,7 @@ static void __ref show_saved_mc(void) struct ucode_cpu_info uci; if (mc_saved_data.mc_saved_count == 0) { - pr_debug("no micorcode data saved.\n"); + pr_debug("no microcode data saved.\n"); return; } pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); @@ -506,7 +506,7 @@ int save_mc_for_early(u8 *mc) if (mc_saved && mc_saved_count) memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct mirocode_intel *)); + mc_saved_count * sizeof(struct microcode_intel *)); /* * Save the microcode patch mc in mc_save_tmp structure if it's a newer * version. @@ -526,7 +526,7 @@ int save_mc_for_early(u8 *mc) show_saved_mc(); /* - * Free old saved microcod data. + * Free old saved microcode data. */ if (mc_saved) { for (i = 0; i < mc_saved_count_init; i++) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index f961de9964c7..ea5f363a1948 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -707,7 +707,7 @@ void __init mtrr_bp_init(void) } else { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { + if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) { /* Pre-Athlon (K6) AMD CPU MTRRs */ mtrr_if = mtrr_ops[X86_VENDOR_AMD]; size_or_mask = SIZE_OR_MASK_BITS(32); @@ -715,14 +715,14 @@ void __init mtrr_bp_init(void) } break; case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { + if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) { mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { + if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) { mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 988c00a1f60d..49f886481615 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). * * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. + * overlapping entries. */ void __init e820_mark_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn; + unsigned long pfn = 0; - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; if (pfn < PFN_UP(ei->addr)) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2fac1343a90b..df088bb03fb3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -404,8 +404,8 @@ GLOBAL(system_call_after_swapgs) * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0 - movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + SAVE_ARGS 8, 0, rax_enosys=1 + movq_cfi rax,(ORIG_RAX-ARGOFFSET) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) @@ -417,7 +417,7 @@ system_call_fastpath: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja badsys + ja ret_from_sys_call /* and return regs->ax */ movq %r10,%rcx call *sys_call_table(,%rax,8) # XXX: rip relative movq %rax,RAX-ARGOFFSET(%rsp) @@ -476,28 +476,8 @@ sysret_signal: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET jmp int_check_syscall_exit_work -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - #ifdef CONFIG_AUDITSYSCALL /* - * Fast path for syscall audit without full syscall trace. - * We just call __audit_syscall_entry() directly, and then - * jump back to the normal fast path. - */ -auditsys: - movq %r10,%r9 /* 6th arg: 4th syscall arg */ - movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ - movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ - movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ - movq %rax,%rsi /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ - call __audit_syscall_entry - LOAD_ARGS 0 /* reload call-clobbered registers */ - jmp system_call_fastpath - - /* * Return fast path for syscall audit. Call __audit_syscall_exit() * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT * masked off. @@ -514,18 +494,25 @@ sysret_audit: /* Do syscall tracing */ tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jz auditsys -#endif + leaq -REST_SKIP(%rsp), %rdi + movq $AUDIT_ARCH_X86_64, %rsi + call syscall_trace_enter_phase1 + test %rax, %rax + jnz tracesys_phase2 /* if needed, run the slow path */ + LOAD_ARGS 0 /* else restore clobbered regs */ + jmp system_call_fastpath /* and return to the fast path */ + +tracesys_phase2: SAVE_REST - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ FIXUP_TOP_OF_STACK %rdi - movq %rsp,%rdi - call syscall_trace_enter + movq %rsp, %rdi + movq $AUDIT_ARCH_X86_64, %rsi + movq %rax,%rdx + call syscall_trace_enter_phase2 + /* * Reload arg registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_enter() returned + * We don't reload %rax because syscall_trace_entry_phase2() returned * the value it wants us to use in the table lookup. */ LOAD_ARGS ARGOFFSET, 1 @@ -536,7 +523,7 @@ tracesys: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ + ja int_ret_from_sys_call /* RAX(%rsp) is already set */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c index 9030e83db6ee..82f8d02f0df2 100644 --- a/arch/x86/kernel/iosf_mbi.c +++ b/arch/x86/kernel/iosf_mbi.c @@ -22,10 +22,13 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/capability.h> #include <asm/iosf_mbi.h> #define PCI_DEVICE_ID_BAYTRAIL 0x0F00 +#define PCI_DEVICE_ID_BRASWELL 0x2280 #define PCI_DEVICE_ID_QUARK_X1000 0x0958 static DEFINE_SPINLOCK(iosf_mbi_lock); @@ -187,6 +190,89 @@ bool iosf_mbi_available(void) } EXPORT_SYMBOL(iosf_mbi_available); +#ifdef CONFIG_IOSF_MBI_DEBUG +static u32 dbg_mdr; +static u32 dbg_mcr; +static u32 dbg_mcrx; + +static int mcr_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +static int mcr_set(void *data, u64 val) +{ + u8 command = ((u32)val & 0xFF000000) >> 24, + port = ((u32)val & 0x00FF0000) >> 16, + offset = ((u32)val & 0x0000FF00) >> 8; + int err; + + *(u32 *)data = val; + + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + + if (command & 1u) + err = iosf_mbi_write(port, + command, + dbg_mcrx | offset, + dbg_mdr); + else + err = iosf_mbi_read(port, + command, + dbg_mcrx | offset, + &dbg_mdr); + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n"); + +static struct dentry *iosf_dbg; + +static void iosf_sideband_debug_init(void) +{ + struct dentry *d; + + iosf_dbg = debugfs_create_dir("iosf_sb", NULL); + if (IS_ERR_OR_NULL(iosf_dbg)) + return; + + /* mdr */ + d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcrx */ + debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcr - initiates mailbox tranaction */ + debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + return; + +cleanup: + debugfs_remove_recursive(d); +} + +static void iosf_debugfs_init(void) +{ + iosf_sideband_debug_init(); +} + +static void iosf_debugfs_remove(void) +{ + debugfs_remove_recursive(iosf_dbg); +} +#else +static inline void iosf_debugfs_init(void) { } +static inline void iosf_debugfs_remove(void) { } +#endif /* CONFIG_IOSF_MBI_DEBUG */ + static int iosf_mbi_probe(struct pci_dev *pdev, const struct pci_device_id *unused) { @@ -204,6 +290,7 @@ static int iosf_mbi_probe(struct pci_dev *pdev, static const struct pci_device_id iosf_mbi_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, { 0, }, }; @@ -217,11 +304,15 @@ static struct pci_driver iosf_mbi_pci_driver = { static int __init iosf_mbi_init(void) { + iosf_debugfs_init(); + return pci_register_driver(&iosf_mbi_pci_driver); } static void __exit iosf_mbi_exit(void) { + iosf_debugfs_remove(); + pci_unregister_driver(&iosf_mbi_pci_driver); if (mbi_pdev) { pci_dev_put(mbi_pdev); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 1667b1de8d5d..72e8e310258d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -247,7 +247,8 @@ void machine_kexec(struct kimage *image) /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae, + image->start, + boot_cpu_has(X86_FEATURE_PAE), image->preserve_context); #ifdef CONFIG_KEXEC_JUMP diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0c424a67985d..0ee5025e0fa4 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c @@ -235,6 +235,11 @@ err: pmc_dbgfs_unregister(pmc); return -ENODEV; } +#else +static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_DEBUG_FS */ static int pmc_setup_dev(struct pci_dev *pdev) @@ -262,14 +267,12 @@ static int pmc_setup_dev(struct pci_dev *pdev) /* PMC hardware registers setup */ pmc_hw_reg_setup(pmc); -#ifdef CONFIG_DEBUG_FS ret = pmc_dbgfs_register(pmc, pdev); if (ret) { iounmap(pmc->regmap); - return ret; } -#endif /* CONFIG_DEBUG_FS */ - return 0; + + return ret; } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f804dc935d2a..e127ddaa2d5a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -64,14 +64,16 @@ EXPORT_SYMBOL_GPL(task_xstate_cachep); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - int ret; - *dst = *src; - if (fpu_allocated(&src->thread.fpu)) { - memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); - ret = fpu_alloc(&dst->thread.fpu); - if (ret) - return ret; + + dst->thread.fpu_counter = 0; + dst->thread.fpu.has_fpu = 0; + dst->thread.fpu.last_cpu = ~0; + dst->thread.fpu.state = NULL; + if (tsk_used_math(src)) { + int err = fpu_alloc(&dst->thread.fpu); + if (err) + return err; fpu_copy(dst, src); } return 0; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7bc86bbe7485..8f3ebfe710d0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -138,6 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ @@ -152,9 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); return 0; } *childregs = *current_pt_regs(); @@ -165,13 +164,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca5b02d405c3..3ed4a68d4013 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,7 +163,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -193,8 +192,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->sp = sp; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 678c0ada3b3c..29576c244699 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,24 +1441,126 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 +static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) +{ +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + audit_syscall_entry(arch, regs->orig_ax, regs->di, + regs->si, regs->dx, regs->r10); + } else #endif + { + audit_syscall_entry(arch, regs->orig_ax, regs->bx, + regs->cx, regs->dx, regs->si); + } +} /* - * We must return the syscall number to actually look up in the table. - * This can be -1L to skip running any syscall at all. + * We can return 0 to resume the syscall or anything else to go to phase + * 2. If we resume the syscall, we need to put something appropriate in + * regs->orig_ax. + * + * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax + * are fully functional. + * + * For phase 2's benefit, our return value is: + * 0: resume the syscall + * 1: go to phase 2; no seccomp phase 2 needed + * anything else: go to phase 2; pass return value to seccomp */ -long syscall_trace_enter(struct pt_regs *regs) +unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) +{ + unsigned long ret = 0; + u32 work; + + BUG_ON(regs != task_pt_regs(current)); + + work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; + + /* + * If TIF_NOHZ is set, we are required to call user_exit() before + * doing anything that could touch RCU. + */ + if (work & _TIF_NOHZ) { + user_exit(); + work &= ~TIF_NOHZ; + } + +#ifdef CONFIG_SECCOMP + /* + * Do seccomp first -- it should minimize exposure of other + * code, and keeping seccomp fast is probably more valuable + * than the rest of this. + */ + if (work & _TIF_SECCOMP) { + struct seccomp_data sd; + + sd.arch = arch; + sd.nr = regs->orig_ax; + sd.instruction_pointer = regs->ip; +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + sd.args[0] = regs->di; + sd.args[1] = regs->si; + sd.args[2] = regs->dx; + sd.args[3] = regs->r10; + sd.args[4] = regs->r8; + sd.args[5] = regs->r9; + } else +#endif + { + sd.args[0] = regs->bx; + sd.args[1] = regs->cx; + sd.args[2] = regs->dx; + sd.args[3] = regs->si; + sd.args[4] = regs->di; + sd.args[5] = regs->bp; + } + + BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); + BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); + + ret = seccomp_phase1(&sd); + if (ret == SECCOMP_PHASE1_SKIP) { + regs->orig_ax = -1; + ret = 0; + } else if (ret != SECCOMP_PHASE1_OK) { + return ret; /* Go directly to phase 2 */ + } + + work &= ~_TIF_SECCOMP; + } +#endif + + /* Do our best to finish without phase 2. */ + if (work == 0) + return ret; /* seccomp and/or nohz only (ret == 0 here) */ + +#ifdef CONFIG_AUDITSYSCALL + if (work == _TIF_SYSCALL_AUDIT) { + /* + * If there is no more work to be done except auditing, + * then audit in phase 1. Phase 2 always audits, so, if + * we audit here, then we can't go on to phase 2. + */ + do_audit_syscall_entry(regs, arch); + return 0; + } +#endif + + return 1; /* Something is enabled that we can't handle in phase 1 */ +} + +/* Returns the syscall nr to run (which should match regs->orig_ax). */ +long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, + unsigned long phase1_result) { long ret = 0; + u32 work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; - user_exit(); + BUG_ON(regs != task_pt_regs(current)); /* * If we stepped into a sysenter/syscall insn, it trapped in @@ -1467,17 +1569,21 @@ long syscall_trace_enter(struct pt_regs *regs) * do_debug() and we need to set it again to restore the user * state. If we entered on the slow path, TF was already set. */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (work & _TIF_SINGLESTEP) regs->flags |= X86_EFLAGS_TF; - /* do the secure computing check first */ - if (secure_computing(regs->orig_ax)) { +#ifdef CONFIG_SECCOMP + /* + * Call seccomp_phase2 before running the other hooks so that + * they can see any changes made by a seccomp tracer. + */ + if (phase1_result > 1 && seccomp_phase2(phase1_result)) { /* seccomp failures shouldn't expose any additional code. */ - ret = -1L; - goto out; + return -1; } +#endif - if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + if (unlikely(work & _TIF_SYSCALL_EMU)) ret = -1L; if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && @@ -1487,23 +1593,22 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); -#ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); -#endif + do_audit_syscall_entry(regs, arch); -out: return ret ?: regs->orig_ax; } +long syscall_trace_enter(struct pt_regs *regs) +{ + u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); + + if (phase1_result == 0) + return regs->orig_ax; + else + return syscall_trace_enter_phase2(regs, arch, phase1_result); +} + void syscall_trace_leave(struct pt_regs *regs) { bool step; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index ff898bbf579d..176a0f99d4da 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,6 +498,24 @@ void force_hpet_resume(void) } /* + * According to the datasheet e6xx systems have the HPET hardwired to + * 0xfed00000 + */ +static void e6xx_force_enable_hpet(struct pci_dev *dev) +{ + if (hpet_address || force_hpet_address) + return; + + force_hpet_address = 0xFED00000; + force_hpet_resume_type = NONE_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " + "0x%lx\n", force_hpet_address); + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU, + e6xx_force_enable_hpet); + +/* * HPET MSI on some boards (ATI SB700/SB800) has side effect on * floppy DMA. Disable HPET MSI on such platforms. * See erratum #27 (Misinterpreted MSI Requests May Result in diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 41ead8d3bc0b..235cfd39e0d7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -879,6 +879,15 @@ void __init setup_arch(char **cmdline_p) KERNEL_PGD_PTRS); load_cr3(swapper_pg_dir); + /* + * Note: Quark X1000 CPUs advertise PGE incorrectly and require + * a cr3 based tlb flush, so the following __flush_tlb_all() + * will not flush anything because the cpu quirk which clears + * X86_FEATURE_PGE has not been invoked yet. Though due to the + * load_cr3() above the TLB has been flushed already. The + * quirk is invoked before subsequent calls to __flush_tlb_all() + * so proper operation is guaranteed. + */ __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 2851d63c1202..ed37a768d0fc 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -675,6 +675,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * handler too. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); + /* + * Ensure the signal handler starts with the new fpu state. + */ + if (used_math()) + drop_init_fpu(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e1e1e80fc6a6..957779f4eb40 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(syscall_nr); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 940b142cc11f..4c540c4719d8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -271,8 +271,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_init_fpu(tsk); /* trigger finit */ - return 0; } @@ -402,8 +400,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_eager_fpu()) + if (use_eager_fpu()) { + preempt_disable(); math_state_restore(); + preempt_enable(); + } return err; } else { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9c5b32e2bdc0..d973e61e450d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -349,7 +349,7 @@ out: void vmalloc_sync_all(void) { - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); + sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); } /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5d984769cbd8..4cb8763868fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup); * When memory was added/removed make sure all the processes MM have * suitable PGD entries in the local PGD level page. */ -void sync_global_pgds(unsigned long start, unsigned long end) +void sync_global_pgds(unsigned long start, unsigned long end, int removed) { unsigned long address; @@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end) const pgd_t *pgd_ref = pgd_offset_k(address); struct page *page; - if (pgd_none(*pgd_ref)) + /* + * When it is called after memory hot remove, pgd_none() + * returns true. In this case (removed == 1), we must clear + * the PGD entries in the local PGD level page. + */ + if (pgd_none(*pgd_ref) && !removed) continue; spin_lock(&pgd_lock); @@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else + if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + if (removed) { + if (pgd_none(*pgd_ref) && !pgd_none(*pgd)) + pgd_clear(pgd); + } else { + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + } + spin_unlock(pgt_lock); } spin_unlock(&pgd_lock); @@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start, } if (pgd_changed) - sync_global_pgds(addr, end - 1); + sync_global_pgds(addr, end - 1, 0); __flush_tlb_all(); @@ -976,25 +987,26 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end, bool direct) { unsigned long next; + unsigned long addr; pgd_t *pgd; pud_t *pud; bool pgd_changed = false; - for (; start < end; start = next) { - next = pgd_addr_end(start, end); + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); - pgd = pgd_offset_k(start); + pgd = pgd_offset_k(addr); if (!pgd_present(*pgd)) continue; pud = (pud_t *)pgd_page_vaddr(*pgd); - remove_pud_table(pud, start, next, direct); + remove_pud_table(pud, addr, next, direct); if (free_pud_table(pud, pgd)) pgd_changed = true; } if (pgd_changed) - sync_global_pgds(start, end - 1); + sync_global_pgds(start, end - 1, 1); flush_tlb_all(); } @@ -1341,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) else err = vmemmap_populate_basepages(start, end, node); if (!err) - sync_global_pgds(start, end - 1); + sync_global_pgds(start, end - 1, 0); return err; } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 4e1e5709fe17..1a883705a12a 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) return numa_add_memblk_to(nid, start, end, &numa_meminfo); } -/* Initialize NODE_DATA for a node on the local memory */ -static void __init setup_node_data(int nid, u64 start, u64 end) +/* Allocate NODE_DATA for a node on the local memory */ +static void __init alloc_node_data(int nid) { const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); u64 nd_pa; @@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) int tnid; /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - return; - - start = roundup(start, ZONE_ALIGN); - - printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start, end - 1); - - /* * Allocate node data. Try node-local memory and then any node. * Never allocate in DMA zone. */ @@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) nd = __va(nd_pa); /* report and initialize */ - printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", + printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid, nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) @@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); - NODE_DATA(nid)->node_id = nid; - NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; - NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; node_set_online(nid); } @@ -568,8 +553,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) end = max(mi->blk[i].end, end); } - if (start < end) - setup_node_data(nid, start, end); + if (start >= end) + continue; + + /* + * Don't confuse VM with a node that doesn't have the + * minimum amount of memory: + */ + if (end && (end - start) < NODE_MIN_SIZE) + continue; + + alloc_node_data(nid); } /* Dump memblock with node info and return. */ diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 4dd8cf652579..75cc0978d45d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -59,41 +59,6 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) __flush_tlb_one(vaddr); } -/* - * Associate a large virtual page frame with a given physical page frame - * and protection flags for that frame. pfn is for the base of the page, - * vaddr is what the page gets mapped to - both must be properly aligned. - * The pmd must already be instantiated. Assumes PAE mode. - */ -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); - return; /* BUG(); */ - } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); - return; /* BUG(); */ - } - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); - return; /* BUG(); */ - } - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - set_pmd(pmd, pfn_pmd(pfn, flags)); - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S index 8d0c420465cc..fa4b8b9841ff 100644 --- a/arch/x86/um/checksum_32.S +++ b/arch/x86/um/checksum_32.S @@ -214,242 +214,3 @@ csum_partial: ret #endif - -/* -unsigned int csum_partial_copy_generic (const char *src, char *dst, - int len, int sum, int *src_err_ptr, int *dst_err_ptr) - */ - -/* - * Copy from ds while checksumming, otherwise like csum_partial - * - * The macros SRC and DST specify the type of access for the instruction. - * thus we can call a custom exception handler for all access types. - * - * FIXME: could someone double-check whether I haven't mixed up some SRC and - * DST definitions? It's damn hard to trigger all cases. I hope I got - * them all but there's no guarantee. - */ - -#define SRC(y...) \ - 9999: y; \ - _ASM_EXTABLE(9999b, 6001f) - -#define DST(y...) \ - 9999: y; \ - _ASM_EXTABLE(9999b, 6002f) - -.align 4 - -#ifndef CONFIG_X86_USE_PPRO_CHECKSUM - -#define ARGBASE 16 -#define FP 12 - -csum_partial_copy_generic_i386: - subl $4,%esp - pushl %edi - pushl %esi - pushl %ebx - movl ARGBASE+16(%esp),%eax # sum - movl ARGBASE+12(%esp),%ecx # len - movl ARGBASE+4(%esp),%esi # src - movl ARGBASE+8(%esp),%edi # dst - - testl $2, %edi # Check alignment. - jz 2f # Jump if alignment is ok. - subl $2, %ecx # Alignment uses up two bytes. - jae 1f # Jump if we had at least two bytes. - addl $2, %ecx # ecx was < 2. Deal with it. - jmp 4f -SRC(1: movw (%esi), %bx ) - addl $2, %esi -DST( movw %bx, (%edi) ) - addl $2, %edi - addw %bx, %ax - adcl $0, %eax -2: - movl %ecx, FP(%esp) - shrl $5, %ecx - jz 2f - testl %esi, %esi -SRC(1: movl (%esi), %ebx ) -SRC( movl 4(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - adcl %edx, %eax -DST( movl %edx, 4(%edi) ) - -SRC( movl 8(%esi), %ebx ) -SRC( movl 12(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) - adcl %edx, %eax -DST( movl %edx, 12(%edi) ) - -SRC( movl 16(%esi), %ebx ) -SRC( movl 20(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) - adcl %edx, %eax -DST( movl %edx, 20(%edi) ) - -SRC( movl 24(%esi), %ebx ) -SRC( movl 28(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) - adcl %edx, %eax -DST( movl %edx, 28(%edi) ) - - lea 32(%esi), %esi - lea 32(%edi), %edi - dec %ecx - jne 1b - adcl $0, %eax -2: movl FP(%esp), %edx - movl %edx, %ecx - andl $0x1c, %edx - je 4f - shrl $2, %edx # This clears CF -SRC(3: movl (%esi), %ebx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - lea 4(%esi), %esi - lea 4(%edi), %edi - dec %edx - jne 3b - adcl $0, %eax -4: andl $3, %ecx - jz 7f - cmpl $2, %ecx - jb 5f -SRC( movw (%esi), %cx ) - leal 2(%esi), %esi -DST( movw %cx, (%edi) ) - leal 2(%edi), %edi - je 6f - shll $16,%ecx -SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) -6: addl %ecx, %eax - adcl $0, %eax -7: -5000: - -# Exception handler: -.section .fixup, "ax" - -6001: - movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - - # zero the complete destination - computing the rest - # is too much work - movl ARGBASE+8(%esp), %edi # dst - movl ARGBASE+12(%esp), %ecx # len - xorl %eax,%eax - rep ; stosb - - jmp 5000b - -6002: - movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) - jmp 5000b - -.previous - - popl %ebx - popl %esi - popl %edi - popl %ecx # equivalent to addl $4,%esp - ret - -#else - -/* Version for PentiumII/PPro */ - -#define ROUND1(x) \ - SRC(movl x(%esi), %ebx ) ; \ - addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; - -#define ROUND(x) \ - SRC(movl x(%esi), %ebx ) ; \ - adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; - -#define ARGBASE 12 - -csum_partial_copy_generic_i386: - pushl %ebx - pushl %edi - pushl %esi - movl ARGBASE+4(%esp),%esi #src - movl ARGBASE+8(%esp),%edi #dst - movl ARGBASE+12(%esp),%ecx #len - movl ARGBASE+16(%esp),%eax #sum -# movl %ecx, %edx - movl %ecx, %ebx - movl %esi, %edx - shrl $6, %ecx - andl $0x3c, %ebx - negl %ebx - subl %ebx, %esi - subl %ebx, %edi - lea -1(%esi),%edx - andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx - testl %esi, %esi - jmp *%ebx -1: addl $64,%esi - addl $64,%edi - SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) - ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) - ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) - ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) - ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) -3: adcl $0,%eax - addl $64, %edx - dec %ecx - jge 1b -4: movl ARGBASE+12(%esp),%edx #len - andl $3, %edx - jz 7f - cmpl $2, %edx - jb 5f -SRC( movw (%esi), %dx ) - leal 2(%esi), %esi -DST( movw %dx, (%edi) ) - leal 2(%edi), %edi - je 6f - shll $16,%edx -5: -SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) -6: addl %edx, %eax - adcl $0, %eax -7: -.section .fixup, "ax" -6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - # zero the complete destination (computing the rest is too much work) - movl ARGBASE+8(%esp),%edi # dst - movl ARGBASE+12(%esp),%ecx # len - xorl %eax,%eax - rep; stosb - jmp 7b -6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) - jmp 7b -.previous - - popl %esi - popl %edi - popl %ebx - ret - -#undef ROUND -#undef ROUND1 - -#endif diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index fd57829b30d8..0224987556ce 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -109,16 +109,18 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, /* Validate mapping addresses. */ for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { - if (!syms[i]) + INT_BITS symval = syms[special_pages[i]]; + + if (!symval) continue; /* The mapping isn't used; ignore it. */ - if (syms[i] % 4096) + if (symval % 4096) fail("%s must be a multiple of 4096\n", required_syms[i].name); - if (syms[sym_vvar_start] > syms[i] + 4096) - fail("%s underruns begin_vvar\n", + if (symval + 4096 < syms[sym_vvar_start]) + fail("%s underruns vvar_start\n", required_syms[i].name); - if (syms[i] + 4096 > 0) + if (symval + 4096 > 0) fail("%s is on the wrong side of the vdso text\n", required_syms[i].name); } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 5df05f26b7d9..329db997ee66 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1660,6 +1660,14 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, device->discipline->check_for_device_change(device, cqr, irb); dasd_put_device(device); } + + /* check for for attention message */ + if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) { + device = dasd_device_from_cdev_locked(cdev); + device->discipline->check_attention(device, irb->esw.esw1.lpum); + dasd_put_device(device); + } + if (!cqr) return; @@ -2261,8 +2269,8 @@ static inline int _wait_for_wakeup_queue(struct list_head *ccw_queue) static int _dasd_sleep_on_queue(struct list_head *ccw_queue, int interruptible) { struct dasd_device *device; - int rc; struct dasd_ccw_req *cqr, *n; + int rc; retry: list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { @@ -2310,21 +2318,26 @@ retry: /* * for alias devices simplify error recovery and * return to upper layer + * do not skip ERP requests */ - if (cqr->startdev != cqr->basedev && + if (cqr->startdev != cqr->basedev && !cqr->refers && (cqr->status == DASD_CQR_TERMINATED || cqr->status == DASD_CQR_NEED_ERP)) return -EAGAIN; - else { - /* normal recovery for basedev IO */ - if (__dasd_sleep_on_erp(cqr)) { - if (!cqr->status == DASD_CQR_TERMINATED && - !cqr->status == DASD_CQR_NEED_ERP) - break; - rc = 1; - } + + /* normal recovery for basedev IO */ + if (__dasd_sleep_on_erp(cqr)) { + goto retry; + /* remember that ERP was needed */ + rc = 1; + /* skip processing for active cqr */ + if (cqr->status != DASD_CQR_TERMINATED && + cqr->status != DASD_CQR_NEED_ERP) + break; } } + + /* start ERP requests in upper loop */ if (rc) goto retry; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 14ba80bfa571..8286f742436b 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1432,6 +1432,29 @@ static ssize_t dasd_reservation_state_store(struct device *dev, static DEVICE_ATTR(last_known_reservation_state, 0644, dasd_reservation_state_show, dasd_reservation_state_store); +static ssize_t dasd_pm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dasd_device *device; + u8 opm, nppm, cablepm, cuirpm, hpfpm; + + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) + return sprintf(buf, "0\n"); + + opm = device->path_data.opm; + nppm = device->path_data.npm; + cablepm = device->path_data.cablepm; + cuirpm = device->path_data.cuirpm; + hpfpm = device->path_data.hpfpm; + dasd_put_device(device); + + return sprintf(buf, "%02x %02x %02x %02x %02x\n", opm, nppm, + cablepm, cuirpm, hpfpm); +} + +static DEVICE_ATTR(path_masks, 0444, dasd_pm_show, NULL); + static struct attribute * dasd_attrs[] = { &dev_attr_readonly.attr, &dev_attr_discipline.attr, @@ -1450,6 +1473,7 @@ static struct attribute * dasd_attrs[] = { &dev_attr_reservation_policy.attr, &dev_attr_last_known_reservation_state.attr, &dev_attr_safe_offline.attr, + &dev_attr_path_masks.attr, NULL, }; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 51dea7baf02c..d47f5b99623a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -29,6 +29,8 @@ #include <asm/cio.h> #include <asm/ccwdev.h> #include <asm/itcw.h> +#include <asm/schid.h> +#include <asm/chpid.h> #include "dasd_int.h" #include "dasd_eckd.h" @@ -112,6 +114,12 @@ struct path_verification_work_data { static struct path_verification_work_data *path_verification_worker; static DEFINE_MUTEX(dasd_path_verification_mutex); +struct check_attention_work_data { + struct work_struct worker; + struct dasd_device *device; + __u8 lpum; +}; + /* initial attempt at a probe function. this can be simplified once * the other detection code is gone */ static int @@ -1126,6 +1134,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device) "device %s instead of %s\n", lpm, print_path_uid, print_device_uid); path_err = -EINVAL; + path_data->cablepm |= lpm; continue; } @@ -1141,6 +1150,13 @@ static int dasd_eckd_read_conf(struct dasd_device *device) break; } path_data->opm |= lpm; + /* + * if the path is used + * it should not be in one of the negative lists + */ + path_data->cablepm &= ~lpm; + path_data->hpfpm &= ~lpm; + path_data->cuirpm &= ~lpm; if (conf_data != private->conf_data) kfree(conf_data); @@ -1230,7 +1246,7 @@ static void do_path_verification_work(struct work_struct *work) struct dasd_eckd_private path_private; struct dasd_uid *uid; __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; - __u8 lpm, opm, npm, ppm, epm; + __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm; unsigned long flags; char print_uid[60]; int rc; @@ -1248,6 +1264,9 @@ static void do_path_verification_work(struct work_struct *work) npm = 0; ppm = 0; epm = 0; + hpfpm = 0; + cablepm = 0; + for (lpm = 0x80; lpm; lpm >>= 1) { if (!(lpm & data->tbvpm)) continue; @@ -1289,6 +1308,7 @@ static void do_path_verification_work(struct work_struct *work) opm &= ~lpm; npm &= ~lpm; ppm &= ~lpm; + hpfpm |= lpm; continue; } @@ -1350,6 +1370,7 @@ static void do_path_verification_work(struct work_struct *work) opm &= ~lpm; npm &= ~lpm; ppm &= ~lpm; + cablepm |= lpm; continue; } } @@ -1364,12 +1385,21 @@ static void do_path_verification_work(struct work_struct *work) spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); if (!device->path_data.opm && opm) { device->path_data.opm = opm; + device->path_data.cablepm &= ~opm; + device->path_data.cuirpm &= ~opm; + device->path_data.hpfpm &= ~opm; dasd_generic_path_operational(device); - } else + } else { device->path_data.opm |= opm; + device->path_data.cablepm &= ~opm; + device->path_data.cuirpm &= ~opm; + device->path_data.hpfpm &= ~opm; + } device->path_data.npm |= npm; device->path_data.ppm |= ppm; device->path_data.tbvpm |= epm; + device->path_data.cablepm |= cablepm; + device->path_data.hpfpm |= hpfpm; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } @@ -4475,6 +4505,343 @@ out_err: return -1; } +static int dasd_eckd_read_message_buffer(struct dasd_device *device, + struct dasd_rssd_messages *messages, + __u8 lpum) +{ + struct dasd_rssd_messages *message_buf; + struct dasd_psf_prssd_data *prssdp; + struct dasd_eckd_private *private; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + private = (struct dasd_eckd_private *) device->private; + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, + (sizeof(struct dasd_psf_prssd_data) + + sizeof(struct dasd_rssd_messages)), + device); + if (IS_ERR(cqr)) { + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + "Could not allocate read message buffer request"); + return PTR_ERR(cqr); + } + + cqr->startdev = device; + cqr->memdev = device; + cqr->block = NULL; + cqr->retries = 256; + cqr->expires = 10 * HZ; + + /* we need to check for messages on exactly this path */ + set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags); + cqr->lpm = lpum; + + /* Prepare for Read Subsystem Data */ + prssdp = (struct dasd_psf_prssd_data *) cqr->data; + memset(prssdp, 0, sizeof(struct dasd_psf_prssd_data)); + prssdp->order = PSF_ORDER_PRSSD; + prssdp->suborder = 0x03; /* Message Buffer */ + /* all other bytes of prssdp must be zero */ + + ccw = cqr->cpaddr; + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->count = sizeof(struct dasd_psf_prssd_data); + ccw->flags |= CCW_FLAG_CC; + ccw->flags |= CCW_FLAG_SLI; + ccw->cda = (__u32)(addr_t) prssdp; + + /* Read Subsystem Data - message buffer */ + message_buf = (struct dasd_rssd_messages *) (prssdp + 1); + memset(message_buf, 0, sizeof(struct dasd_rssd_messages)); + + ccw++; + ccw->cmd_code = DASD_ECKD_CCW_RSSD; + ccw->count = sizeof(struct dasd_rssd_messages); + ccw->flags |= CCW_FLAG_SLI; + ccw->cda = (__u32)(addr_t) message_buf; + + cqr->buildclk = get_tod_clock(); + cqr->status = DASD_CQR_FILLED; + rc = dasd_sleep_on_immediatly(cqr); + if (rc == 0) { + prssdp = (struct dasd_psf_prssd_data *) cqr->data; + message_buf = (struct dasd_rssd_messages *) + (prssdp + 1); + memcpy(messages, message_buf, + sizeof(struct dasd_rssd_messages)); + } else + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading messages failed with rc=%d\n" + , rc); + dasd_sfree_request(cqr, cqr->memdev); + return rc; +} + +/* + * Perform Subsystem Function - CUIR response + */ +static int +dasd_eckd_psf_cuir_response(struct dasd_device *device, int response, + __u32 message_id, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct dasd_psf_cuir_response *psf_cuir; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ , + sizeof(struct dasd_psf_cuir_response), + device); + + if (IS_ERR(cqr)) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "Could not allocate PSF-CUIR request"); + return PTR_ERR(cqr); + } + + psf_cuir = (struct dasd_psf_cuir_response *)cqr->data; + psf_cuir->order = PSF_ORDER_CUIR_RESPONSE; + psf_cuir->cc = response; + if (desc) + psf_cuir->chpid = desc->chpid; + psf_cuir->message_id = message_id; + psf_cuir->cssid = sch_id.cssid; + psf_cuir->ssid = sch_id.ssid; + + ccw = cqr->cpaddr; + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->cda = (__u32)(addr_t)psf_cuir; + ccw->count = sizeof(struct dasd_psf_cuir_response); + + cqr->startdev = device; + cqr->memdev = device; + cqr->block = NULL; + cqr->retries = 256; + cqr->expires = 10*HZ; + cqr->buildclk = get_tod_clock(); + cqr->status = DASD_CQR_FILLED; + + rc = dasd_sleep_on(cqr); + + dasd_sfree_request(cqr, cqr->memdev); + return rc; +} + +static int dasd_eckd_cuir_change_state(struct dasd_device *device, __u8 lpum) +{ + unsigned long flags; + __u8 tbcpm; + + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + tbcpm = device->path_data.opm & ~lpum; + if (tbcpm) { + device->path_data.opm = tbcpm; + device->path_data.cuirpm |= lpum; + } + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + return tbcpm ? 0 : PSF_CUIR_LAST_PATH; +} + +/* + * walk through all devices and quiesce them + * if it is the last path return error + * + * if only part of the devices are quiesced and an error + * occurs no onlining necessary, the storage server will + * notify the already set offline devices again + */ +static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct alias_pav_group *pavgroup, *tempgroup; + struct dasd_eckd_private *private; + struct dasd_device *dev, *n; + int rc; + + private = (struct dasd_eckd_private *) device->private; + rc = 0; + + /* active devices */ + list_for_each_entry_safe(dev, n, + &private->lcu->active_devices, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + + /* inactive devices */ + list_for_each_entry_safe(dev, n, + &private->lcu->inactive_devices, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + + /* devices in PAV groups */ + list_for_each_entry_safe(pavgroup, tempgroup, + &private->lcu->grouplist, group) { + list_for_each_entry_safe(dev, n, &pavgroup->baselist, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + list_for_each_entry_safe(dev, n, &pavgroup->aliaslist, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + } + + pr_warn("Service on the storage server caused path %x.%02x to go offline", + sch_id.cssid, desc ? desc->chpid : 0); + rc = PSF_CUIR_COMPLETED; +out: + return rc; +} + +static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct alias_pav_group *pavgroup, *tempgroup; + struct dasd_eckd_private *private; + struct dasd_device *dev, *n; + + pr_info("Path %x.%02x is back online after service on the storage server", + sch_id.cssid, desc ? desc->chpid : 0); + private = (struct dasd_eckd_private *) device->private; + + /* + * the path may have been added through a generic path event before + * only trigger path verification if the path is not already in use + */ + + list_for_each_entry_safe(dev, n, + &private->lcu->active_devices, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + + list_for_each_entry_safe(dev, n, + &private->lcu->inactive_devices, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + + /* devices in PAV groups */ + list_for_each_entry_safe(pavgroup, tempgroup, + &private->lcu->grouplist, + group) { + list_for_each_entry_safe(dev, n, + &pavgroup->baselist, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + list_for_each_entry_safe(dev, n, + &pavgroup->aliaslist, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + } + return PSF_CUIR_COMPLETED; +} + +static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages, + __u8 lpum) +{ + struct dasd_cuir_message *cuir = messages; + struct channel_path_desc *desc; + struct subchannel_id sch_id; + int pos, response; + ccw_device_get_schid(device->cdev, &sch_id); + + /* get position of path in mask */ + pos = 8 - ffs(lpum); + /* get channel path descriptor from this position */ + desc = ccw_device_get_chp_desc(device->cdev, pos); + + if (cuir->code == CUIR_QUIESCE) { + /* quiesce */ + response = dasd_eckd_cuir_quiesce(device, lpum, desc, sch_id); + } else if (cuir->code == CUIR_RESUME) { + /* resume */ + response = dasd_eckd_cuir_resume(device, lpum, desc, sch_id); + } else + response = PSF_CUIR_NOT_SUPPORTED; + + dasd_eckd_psf_cuir_response(device, response, cuir->message_id, + desc, sch_id); + + /* free descriptor copy */ + kfree(desc); +} + +static void dasd_eckd_check_attention_work(struct work_struct *work) +{ + struct check_attention_work_data *data; + struct dasd_rssd_messages *messages; + struct dasd_device *device; + int rc; + + data = container_of(work, struct check_attention_work_data, worker); + device = data->device; + + messages = kzalloc(sizeof(*messages), GFP_KERNEL); + if (!messages) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "Could not allocate attention message buffer"); + goto out; + } + + rc = dasd_eckd_read_message_buffer(device, messages, data->lpum); + if (rc) + goto out; + + if (messages->length == ATTENTION_LENGTH_CUIR && + messages->format == ATTENTION_FORMAT_CUIR) + dasd_eckd_handle_cuir(device, messages, data->lpum); + +out: + dasd_put_device(device); + kfree(messages); + kfree(data); +} + +static int dasd_eckd_check_attention(struct dasd_device *device, __u8 lpum) +{ + struct check_attention_work_data *data; + + data = kzalloc(sizeof(*data), GFP_ATOMIC); + if (!data) + return -ENOMEM; + INIT_WORK(&data->worker, dasd_eckd_check_attention_work); + dasd_get_device(device); + data->device = device; + data->lpum = lpum; + schedule_work(&data->worker); + return 0; +} + static struct ccw_driver dasd_eckd_driver = { .driver = { .name = "dasd-eckd", @@ -4539,6 +4906,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .reload = dasd_eckd_reload_device, .get_uid = dasd_eckd_get_uid, .kick_validate = dasd_eckd_kick_validate_server, + .check_attention = dasd_eckd_check_attention, }; static int __init diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 2555e494591f..ddab7df36e25 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -51,8 +51,35 @@ /* * Perform Subsystem Function / Sub-Orders */ -#define PSF_ORDER_PRSSD 0x18 -#define PSF_ORDER_SSC 0x1D +#define PSF_ORDER_PRSSD 0x18 +#define PSF_ORDER_CUIR_RESPONSE 0x1A +#define PSF_ORDER_SSC 0x1D + +/* + * CUIR response condition codes + */ +#define PSF_CUIR_INVALID 0x00 +#define PSF_CUIR_COMPLETED 0x01 +#define PSF_CUIR_NOT_SUPPORTED 0x02 +#define PSF_CUIR_ERROR_IN_REQ 0x03 +#define PSF_CUIR_DENIED 0x04 +#define PSF_CUIR_LAST_PATH 0x05 +#define PSF_CUIR_DEVICE_ONLINE 0x06 +#define PSF_CUIR_VARY_FAILURE 0x07 +#define PSF_CUIR_SOFTWARE_FAILURE 0x08 +#define PSF_CUIR_NOT_RECOGNIZED 0x09 + +/* + * CUIR codes + */ +#define CUIR_QUIESCE 0x01 +#define CUIR_RESUME 0x02 + +/* + * attention message definitions + */ +#define ATTENTION_LENGTH_CUIR 0x0e +#define ATTENTION_FORMAT_CUIR 0x01 /* * Size that is reportet for large volumes in the old 16-bit no_cyl field @@ -342,6 +369,38 @@ struct dasd_rssd_features { char feature[256]; } __attribute__((packed)); +struct dasd_rssd_messages { + __u16 length; + __u8 format; + __u8 code; + __u32 message_id; + __u8 flags; + char messages[4087]; +} __packed; + +struct dasd_cuir_message { + __u16 length; + __u8 format; + __u8 code; + __u32 message_id; + __u8 flags; + __u8 neq_map[3]; + __u8 ned_map; + __u8 record_selector; +} __packed; + +struct dasd_psf_cuir_response { + __u8 order; + __u8 flags; + __u8 cc; + __u8 chpid; + __u16 device_nr; + __u16 reserved; + __u32 message_id; + __u64 system_id; + __u8 cssid; + __u8 ssid; +} __packed; /* * Perform Subsystem Function - Prepare for Read Subsystem Data diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index c20170166909..8b5d4100abf7 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -357,6 +357,7 @@ struct dasd_discipline { int (*get_uid) (struct dasd_device *, struct dasd_uid *); void (*kick_validate) (struct dasd_device *); + int (*check_attention)(struct dasd_device *, __u8); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; @@ -382,6 +383,10 @@ struct dasd_path { __u8 tbvpm; __u8 ppm; __u8 npm; + /* paths that are not used because of a special condition */ + __u8 cablepm; /* miss-cabled */ + __u8 hpfpm; /* the HPF requirements of the other paths are not met */ + __u8 cuirpm; /* CUIR varied offline */ }; struct dasd_profile_info { @@ -501,7 +506,10 @@ struct dasd_block { struct dasd_profile profile; }; - +struct dasd_attention_data { + struct dasd_device *device; + __u8 lpum; +}; /* reasons why device (ccw_device_start) was stopped */ #define DASD_STOPPED_NOT_ACC 1 /* not accessible */ diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 71bf959732fe..dc24ecfac2d1 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -102,6 +102,19 @@ config SCLP_ASYNC want for inform other people about your kernel panics, need this feature and intend to run your kernel in LPAR. +config HMC_DRV + def_tristate m + prompt "Support for file transfers from HMC drive CD/DVD-ROM" + depends on 64BIT + select CRC16 + help + This option enables support for file transfers from a Hardware + Management Console (HMC) drive CD/DVD-ROM. It is available as a + module, called 'hmcdrv', and also as kernel built-in. There is one + optional parameter for this module: cachesize=N, which modifies the + transfer cache size from it's default value 0.5MB to N bytes. If N + is zero, then no caching is performed. + config S390_TAPE def_tristate m prompt "S/390 tape device support" diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 78b6ace7edcb..6fa9364d1c07 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -33,3 +33,6 @@ obj-$(CONFIG_S390_VMUR) += vmur.o zcore_mod-objs := sclp_sdias.o zcore.o obj-$(CONFIG_CRASH_DUMP) += zcore_mod.o + +hmcdrv-objs := hmcdrv_mod.o hmcdrv_dev.o hmcdrv_ftp.o hmcdrv_cache.o diag_ftp.o sclp_ftp.o +obj-$(CONFIG_HMC_DRV) += hmcdrv.o diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c new file mode 100644 index 000000000000..93889632fdf9 --- /dev/null +++ b/drivers/s390/char/diag_ftp.c @@ -0,0 +1,237 @@ +/* + * DIAGNOSE X'2C4' instruction based HMC FTP services, useable on z/VM + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/wait.h> +#include <linux/string.h> +#include <asm/ctl_reg.h> + +#include "hmcdrv_ftp.h" +#include "diag_ftp.h" + +/* DIAGNOSE X'2C4' return codes in Ry */ +#define DIAG_FTP_RET_OK 0 /* HMC FTP started successfully */ +#define DIAG_FTP_RET_EBUSY 4 /* HMC FTP service currently busy */ +#define DIAG_FTP_RET_EIO 8 /* HMC FTP service I/O error */ +/* and an artificial extension */ +#define DIAG_FTP_RET_EPERM 2 /* HMC FTP service privilege error */ + +/* FTP service status codes (after INTR at guest real location 133) */ +#define DIAG_FTP_STAT_OK 0U /* request completed successfully */ +#define DIAG_FTP_STAT_PGCC 4U /* program check condition */ +#define DIAG_FTP_STAT_PGIOE 8U /* paging I/O error */ +#define DIAG_FTP_STAT_TIMEOUT 12U /* timeout */ +#define DIAG_FTP_STAT_EBASE 16U /* base of error codes from SCLP */ +#define DIAG_FTP_STAT_LDFAIL (DIAG_FTP_STAT_EBASE + 1U) /* failed */ +#define DIAG_FTP_STAT_LDNPERM (DIAG_FTP_STAT_EBASE + 2U) /* not allowed */ +#define DIAG_FTP_STAT_LDRUNS (DIAG_FTP_STAT_EBASE + 3U) /* runs */ +#define DIAG_FTP_STAT_LDNRUNS (DIAG_FTP_STAT_EBASE + 4U) /* not runs */ + +/** + * struct diag_ftp_ldfpl - load file FTP parameter list (LDFPL) + * @bufaddr: real buffer address (at 4k boundary) + * @buflen: length of buffer + * @offset: dir/file offset + * @intparm: interruption parameter (unused) + * @transferred: bytes transferred + * @fsize: file size, filled on GET + * @failaddr: failing address + * @spare: padding + * @fident: file name - ASCII + */ +struct diag_ftp_ldfpl { + u64 bufaddr; + u64 buflen; + u64 offset; + u64 intparm; + u64 transferred; + u64 fsize; + u64 failaddr; + u64 spare; + u8 fident[HMCDRV_FTP_FIDENT_MAX]; +} __packed; + +static DECLARE_COMPLETION(diag_ftp_rx_complete); +static int diag_ftp_subcode; + +/** + * diag_ftp_handler() - FTP services IRQ handler + * @extirq: external interrupt (sub-) code + * @param32: 32-bit interruption parameter from &struct diag_ftp_ldfpl + * @param64: unused (for 64-bit interrupt parameters) + */ +static void diag_ftp_handler(struct ext_code extirq, + unsigned int param32, + unsigned long param64) +{ + if ((extirq.subcode >> 8) != 8) + return; /* not a FTP services sub-code */ + + inc_irq_stat(IRQEXT_FTP); + diag_ftp_subcode = extirq.subcode & 0xffU; + complete(&diag_ftp_rx_complete); +} + +/** + * diag_ftp_2c4() - DIAGNOSE X'2C4' service call + * @fpl: pointer to prepared LDFPL + * @cmd: FTP command to be executed + * + * Performs a DIAGNOSE X'2C4' call with (input/output) FTP parameter list + * @fpl and FTP function code @cmd. In case of an error the function does + * nothing and returns an (negative) error code. + * + * Notes: + * 1. This function only initiates a transfer, so the caller must wait + * for completion (asynchronous execution). + * 2. The FTP parameter list @fpl must be aligned to a double-word boundary. + * 3. fpl->bufaddr must be a real address, 4k aligned + */ +static int diag_ftp_2c4(struct diag_ftp_ldfpl *fpl, + enum hmcdrv_ftp_cmdid cmd) +{ + int rc; + + asm volatile( + " diag %[addr],%[cmd],0x2c4\n" + "0: j 2f\n" + "1: la %[rc],%[err]\n" + "2:\n" + EX_TABLE(0b, 1b) + : [rc] "=d" (rc), "+m" (*fpl) + : [cmd] "0" (cmd), [addr] "d" (virt_to_phys(fpl)), + [err] "i" (DIAG_FTP_RET_EPERM) + : "cc"); + + switch (rc) { + case DIAG_FTP_RET_OK: + return 0; + case DIAG_FTP_RET_EBUSY: + return -EBUSY; + case DIAG_FTP_RET_EPERM: + return -EPERM; + case DIAG_FTP_RET_EIO: + default: + return -EIO; + } +} + +/** + * diag_ftp_cmd() - executes a DIAG X'2C4' FTP command, targeting a HMC + * @ftp: pointer to FTP command specification + * @fsize: return of file size (or NULL if undesirable) + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure locking. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t diag_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize) +{ + struct diag_ftp_ldfpl *ldfpl; + ssize_t len; +#ifdef DEBUG + unsigned long start_jiffies; + + pr_debug("starting DIAG X'2C4' on '%s', requesting %zd bytes\n", + ftp->fname, ftp->len); + start_jiffies = jiffies; +#endif + init_completion(&diag_ftp_rx_complete); + + ldfpl = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!ldfpl) { + len = -ENOMEM; + goto out; + } + + len = strlcpy(ldfpl->fident, ftp->fname, sizeof(ldfpl->fident)); + if (len >= HMCDRV_FTP_FIDENT_MAX) { + len = -EINVAL; + goto out_free; + } + + ldfpl->transferred = 0; + ldfpl->fsize = 0; + ldfpl->offset = ftp->ofs; + ldfpl->buflen = ftp->len; + ldfpl->bufaddr = virt_to_phys(ftp->buf); + + len = diag_ftp_2c4(ldfpl, ftp->id); + if (len) + goto out_free; + + /* + * There is no way to cancel the running diag X'2C4', the code + * needs to wait unconditionally until the transfer is complete. + */ + wait_for_completion(&diag_ftp_rx_complete); + +#ifdef DEBUG + pr_debug("completed DIAG X'2C4' after %lu ms\n", + (jiffies - start_jiffies) * 1000 / HZ); + pr_debug("status of DIAG X'2C4' is %u, with %lld/%lld bytes\n", + diag_ftp_subcode, ldfpl->transferred, ldfpl->fsize); +#endif + + switch (diag_ftp_subcode) { + case DIAG_FTP_STAT_OK: /* success */ + len = ldfpl->transferred; + if (fsize) + *fsize = ldfpl->fsize; + break; + case DIAG_FTP_STAT_LDNPERM: + len = -EPERM; + break; + case DIAG_FTP_STAT_LDRUNS: + len = -EBUSY; + break; + case DIAG_FTP_STAT_LDFAIL: + len = -ENOENT; /* no such file or media */ + break; + default: + len = -EIO; + break; + } + +out_free: + free_page((unsigned long) ldfpl); +out: + return len; +} + +/** + * diag_ftp_startup() - startup of FTP services, when running on z/VM + * + * Return: 0 on success, else an (negative) error code + */ +int diag_ftp_startup(void) +{ + int rc; + + rc = register_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler); + if (rc) + return rc; + + ctl_set_bit(0, 63 - 22); + return 0; +} + +/** + * diag_ftp_shutdown() - shutdown of FTP services, when running on z/VM + */ +void diag_ftp_shutdown(void) +{ + ctl_clear_bit(0, 63 - 22); + unregister_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler); +} diff --git a/drivers/s390/char/diag_ftp.h b/drivers/s390/char/diag_ftp.h new file mode 100644 index 000000000000..3abd2614053a --- /dev/null +++ b/drivers/s390/char/diag_ftp.h @@ -0,0 +1,21 @@ +/* + * DIAGNOSE X'2C4' instruction based SE/HMC FTP Services, useable on z/VM + * + * Notice that all functions exported here are not reentrant. + * So usage should be exclusive, ensured by the caller (e.g. using a + * mutex). + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __DIAG_FTP_H__ +#define __DIAG_FTP_H__ + +#include "hmcdrv_ftp.h" + +int diag_ftp_startup(void); +void diag_ftp_shutdown(void); +ssize_t diag_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize); + +#endif /* __DIAG_FTP_H__ */ diff --git a/drivers/s390/char/hmcdrv_cache.c b/drivers/s390/char/hmcdrv_cache.c new file mode 100644 index 000000000000..4cda5ada143a --- /dev/null +++ b/drivers/s390/char/hmcdrv_cache.c @@ -0,0 +1,252 @@ +/* + * SE/HMC Drive (Read) Cache Functions + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/jiffies.h> + +#include "hmcdrv_ftp.h" +#include "hmcdrv_cache.h" + +#define HMCDRV_CACHE_TIMEOUT 30 /* aging timeout in seconds */ + +/** + * struct hmcdrv_cache_entry - file cache (only used on read/dir) + * @id: FTP command ID + * @content: kernel-space buffer, 4k aligned + * @len: size of @content cache (0 if caching disabled) + * @ofs: start of content within file (-1 if no cached content) + * @fname: file name + * @fsize: file size + * @timeout: cache timeout in jiffies + * + * Notice that the first three members (id, fname, fsize) are cached on all + * read/dir requests. But content is cached only under some preconditions. + * Uncached content is signalled by a negative value of @ofs. + */ +struct hmcdrv_cache_entry { + enum hmcdrv_ftp_cmdid id; + char fname[HMCDRV_FTP_FIDENT_MAX]; + size_t fsize; + loff_t ofs; + unsigned long timeout; + void *content; + size_t len; +}; + +static int hmcdrv_cache_order; /* cache allocated page order */ + +static struct hmcdrv_cache_entry hmcdrv_cache_file = { + .fsize = SIZE_MAX, + .ofs = -1, + .len = 0, + .fname = {'\0'} +}; + +/** + * hmcdrv_cache_get() - looks for file data/content in read cache + * @ftp: pointer to FTP command specification + * + * Return: number of bytes read from cache or a negative number if nothing + * in content cache (for the file/cmd specified in @ftp) + */ +static ssize_t hmcdrv_cache_get(const struct hmcdrv_ftp_cmdspec *ftp) +{ + loff_t pos; /* position in cache (signed) */ + ssize_t len; + + if ((ftp->id != hmcdrv_cache_file.id) || + strcmp(hmcdrv_cache_file.fname, ftp->fname)) + return -1; + + if (ftp->ofs >= hmcdrv_cache_file.fsize) /* EOF ? */ + return 0; + + if ((hmcdrv_cache_file.ofs < 0) || /* has content? */ + time_after(jiffies, hmcdrv_cache_file.timeout)) + return -1; + + /* there seems to be cached content - calculate the maximum number + * of bytes that can be returned (regarding file size and offset) + */ + len = hmcdrv_cache_file.fsize - ftp->ofs; + + if (len > ftp->len) + len = ftp->len; + + /* check if the requested chunk falls into our cache (which starts + * at offset 'hmcdrv_cache_file.ofs' in the file of interest) + */ + pos = ftp->ofs - hmcdrv_cache_file.ofs; + + if ((pos >= 0) && + ((pos + len) <= hmcdrv_cache_file.len)) { + + memcpy(ftp->buf, + hmcdrv_cache_file.content + pos, + len); + pr_debug("using cached content of '%s', returning %zd/%zd bytes\n", + hmcdrv_cache_file.fname, len, + hmcdrv_cache_file.fsize); + + return len; + } + + return -1; +} + +/** + * hmcdrv_cache_do() - do a HMC drive CD/DVD transfer with cache update + * @ftp: pointer to FTP command specification + * @func: FTP transfer function to be used + * + * Return: number of bytes read/written or a (negative) error code + */ +static ssize_t hmcdrv_cache_do(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func) +{ + ssize_t len; + + /* only cache content if the read/dir cache really exists + * (hmcdrv_cache_file.len > 0), is large enough to handle the + * request (hmcdrv_cache_file.len >= ftp->len) and there is a need + * to do so (ftp->len > 0) + */ + if ((ftp->len > 0) && (hmcdrv_cache_file.len >= ftp->len)) { + + /* because the cache is not located at ftp->buf, we have to + * assemble a new HMC drive FTP cmd specification (pointing + * to our cache, and using the increased size) + */ + struct hmcdrv_ftp_cmdspec cftp = *ftp; /* make a copy */ + cftp.buf = hmcdrv_cache_file.content; /* and update */ + cftp.len = hmcdrv_cache_file.len; /* buffer data */ + + len = func(&cftp, &hmcdrv_cache_file.fsize); /* now do */ + + if (len > 0) { + pr_debug("caching %zd bytes content for '%s'\n", + len, ftp->fname); + + if (len > ftp->len) + len = ftp->len; + + hmcdrv_cache_file.ofs = ftp->ofs; + hmcdrv_cache_file.timeout = jiffies + + HMCDRV_CACHE_TIMEOUT * HZ; + memcpy(ftp->buf, hmcdrv_cache_file.content, len); + } + } else { + len = func(ftp, &hmcdrv_cache_file.fsize); + hmcdrv_cache_file.ofs = -1; /* invalidate content */ + } + + if (len > 0) { + /* cache some file info (FTP command, file name and file + * size) unconditionally + */ + strlcpy(hmcdrv_cache_file.fname, ftp->fname, + HMCDRV_FTP_FIDENT_MAX); + hmcdrv_cache_file.id = ftp->id; + pr_debug("caching cmd %d, file size %zu for '%s'\n", + ftp->id, hmcdrv_cache_file.fsize, ftp->fname); + } + + return len; +} + +/** + * hmcdrv_cache_cmd() - perform a cached HMC drive CD/DVD transfer + * @ftp: pointer to FTP command specification + * @func: FTP transfer function to be used + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure exclusive execution. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t hmcdrv_cache_cmd(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func) +{ + ssize_t len; + + if ((ftp->id == HMCDRV_FTP_DIR) || /* read cache */ + (ftp->id == HMCDRV_FTP_NLIST) || + (ftp->id == HMCDRV_FTP_GET)) { + + len = hmcdrv_cache_get(ftp); + + if (len >= 0) /* got it from cache ? */ + return len; /* yes */ + + len = hmcdrv_cache_do(ftp, func); + + if (len >= 0) + return len; + + } else { + len = func(ftp, NULL); /* simply do original command */ + } + + /* invalidate the (read) cache in case there was a write operation + * or an error on read/dir + */ + hmcdrv_cache_file.id = HMCDRV_FTP_NOOP; + hmcdrv_cache_file.fsize = LLONG_MAX; + hmcdrv_cache_file.ofs = -1; + + return len; +} + +/** + * hmcdrv_cache_startup() - startup of HMC drive cache + * @cachesize: cache size + * + * Return: 0 on success, else a (negative) error code + */ +int hmcdrv_cache_startup(size_t cachesize) +{ + if (cachesize > 0) { /* perform caching ? */ + hmcdrv_cache_order = get_order(cachesize); + hmcdrv_cache_file.content = + (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, + hmcdrv_cache_order); + + if (!hmcdrv_cache_file.content) { + pr_err("Allocating the requested cache size of %zu bytes failed\n", + cachesize); + return -ENOMEM; + } + + pr_debug("content cache enabled, size is %zu bytes\n", + cachesize); + } + + hmcdrv_cache_file.len = cachesize; + return 0; +} + +/** + * hmcdrv_cache_shutdown() - shutdown of HMC drive cache + */ +void hmcdrv_cache_shutdown(void) +{ + if (hmcdrv_cache_file.content) { + free_pages((unsigned long) hmcdrv_cache_file.content, + hmcdrv_cache_order); + hmcdrv_cache_file.content = NULL; + } + + hmcdrv_cache_file.id = HMCDRV_FTP_NOOP; + hmcdrv_cache_file.fsize = LLONG_MAX; + hmcdrv_cache_file.ofs = -1; + hmcdrv_cache_file.len = 0; /* no cache */ +} diff --git a/drivers/s390/char/hmcdrv_cache.h b/drivers/s390/char/hmcdrv_cache.h new file mode 100644 index 000000000000..a14b57526781 --- /dev/null +++ b/drivers/s390/char/hmcdrv_cache.h @@ -0,0 +1,24 @@ +/* + * SE/HMC Drive (Read) Cache Functions + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_CACHE_H__ +#define __HMCDRV_CACHE_H__ + +#include <linux/mmzone.h> +#include "hmcdrv_ftp.h" + +#define HMCDRV_CACHE_SIZE_DFLT (MAX_ORDER_NR_PAGES * PAGE_SIZE / 2UL) + +typedef ssize_t (*hmcdrv_cache_ftpfunc)(const struct hmcdrv_ftp_cmdspec *ftp, + size_t *fsize); + +ssize_t hmcdrv_cache_cmd(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func); +int hmcdrv_cache_startup(size_t cachesize); +void hmcdrv_cache_shutdown(void); + +#endif /* __HMCDRV_CACHE_H__ */ diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c new file mode 100644 index 000000000000..0c5176179c17 --- /dev/null +++ b/drivers/s390/char/hmcdrv_dev.c @@ -0,0 +1,370 @@ +/* + * HMC Drive CD/DVD Device + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + * This file provides a Linux "misc" character device for access to an + * assigned HMC drive CD/DVD-ROM. It works as follows: First create the + * device by calling hmcdrv_dev_init(). After open() a lseek(fd, 0, + * SEEK_END) indicates that a new FTP command follows (not needed on the + * first command after open). Then write() the FTP command ASCII string + * to it, e.g. "dir /" or "nls <directory>" or "get <filename>". At the + * end read() the response. + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/miscdevice.h> +#include <linux/device.h> +#include <linux/capability.h> +#include <linux/delay.h> +#include <linux/uaccess.h> + +#include "hmcdrv_dev.h" +#include "hmcdrv_ftp.h" + +/* If the following macro is defined, then the HMC device creates it's own + * separated device class (and dynamically assigns a major number). If not + * defined then the HMC device is assigned to the "misc" class devices. + * +#define HMCDRV_DEV_CLASS "hmcftp" + */ + +#define HMCDRV_DEV_NAME "hmcdrv" +#define HMCDRV_DEV_BUSY_DELAY 500 /* delay between -EBUSY trials in ms */ +#define HMCDRV_DEV_BUSY_RETRIES 3 /* number of retries on -EBUSY */ + +struct hmcdrv_dev_node { + +#ifdef HMCDRV_DEV_CLASS + struct cdev dev; /* character device structure */ + umode_t mode; /* mode of device node (unused, zero) */ +#else + struct miscdevice dev; /* "misc" device structure */ +#endif + +}; + +static int hmcdrv_dev_open(struct inode *inode, struct file *fp); +static int hmcdrv_dev_release(struct inode *inode, struct file *fp); +static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence); +static ssize_t hmcdrv_dev_read(struct file *fp, char __user *ubuf, + size_t len, loff_t *pos); +static ssize_t hmcdrv_dev_write(struct file *fp, const char __user *ubuf, + size_t len, loff_t *pos); +static ssize_t hmcdrv_dev_transfer(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len); + +/* + * device operations + */ +static const struct file_operations hmcdrv_dev_fops = { + .open = hmcdrv_dev_open, + .llseek = hmcdrv_dev_seek, + .release = hmcdrv_dev_release, + .read = hmcdrv_dev_read, + .write = hmcdrv_dev_write, +}; + +static struct hmcdrv_dev_node hmcdrv_dev; /* HMC device struct (static) */ + +#ifdef HMCDRV_DEV_CLASS + +static struct class *hmcdrv_dev_class; /* device class pointer */ +static dev_t hmcdrv_dev_no; /* device number (major/minor) */ + +/** + * hmcdrv_dev_name() - provides a naming hint for a device node in /dev + * @dev: device for which the naming/mode hint is + * @mode: file mode for device node created in /dev + * + * See: devtmpfs.c, function devtmpfs_create_node() + * + * Return: recommended device file name in /dev + */ +static char *hmcdrv_dev_name(struct device *dev, umode_t *mode) +{ + char *nodename = NULL; + const char *devname = dev_name(dev); /* kernel device name */ + + if (devname) + nodename = kasprintf(GFP_KERNEL, "%s", devname); + + /* on device destroy (rmmod) the mode pointer may be NULL + */ + if (mode) + *mode = hmcdrv_dev.mode; + + return nodename; +} + +#endif /* HMCDRV_DEV_CLASS */ + +/* + * open() + */ +static int hmcdrv_dev_open(struct inode *inode, struct file *fp) +{ + int rc; + + /* check for non-blocking access, which is really unsupported + */ + if (fp->f_flags & O_NONBLOCK) + return -EINVAL; + + /* Because it makes no sense to open this device read-only (then a + * FTP command cannot be emitted), we respond with an error. + */ + if ((fp->f_flags & O_ACCMODE) == O_RDONLY) + return -EINVAL; + + /* prevent unloading this module as long as anyone holds the + * device file open - so increment the reference count here + */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + fp->private_data = NULL; /* no command yet */ + rc = hmcdrv_ftp_startup(); + if (rc) + module_put(THIS_MODULE); + + pr_debug("open file '/dev/%s' with return code %d\n", + fp->f_dentry->d_name.name, rc); + return rc; +} + +/* + * release() + */ +static int hmcdrv_dev_release(struct inode *inode, struct file *fp) +{ + pr_debug("closing file '/dev/%s'\n", fp->f_dentry->d_name.name); + kfree(fp->private_data); + fp->private_data = NULL; + hmcdrv_ftp_shutdown(); + module_put(THIS_MODULE); + return 0; +} + +/* + * lseek() + */ +static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence) +{ + switch (whence) { + case SEEK_CUR: /* relative to current file position */ + pos += fp->f_pos; /* new position stored in 'pos' */ + break; + + case SEEK_SET: /* absolute (relative to beginning of file) */ + break; /* SEEK_SET */ + + /* We use SEEK_END as a special indicator for a SEEK_SET + * (set absolute position), combined with a FTP command + * clear. + */ + case SEEK_END: + if (fp->private_data) { + kfree(fp->private_data); + fp->private_data = NULL; + } + + break; /* SEEK_END */ + + default: /* SEEK_DATA, SEEK_HOLE: unsupported */ + return -EINVAL; + } + + if (pos < 0) + return -EINVAL; + + if (fp->f_pos != pos) + ++fp->f_version; + + fp->f_pos = pos; + return pos; +} + +/* + * transfer (helper function) + */ +static ssize_t hmcdrv_dev_transfer(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len) +{ + ssize_t retlen; + unsigned trials = HMCDRV_DEV_BUSY_RETRIES; + + do { + retlen = hmcdrv_ftp_cmd(cmd, offset, buf, len); + + if (retlen != -EBUSY) + break; + + msleep(HMCDRV_DEV_BUSY_DELAY); + + } while (--trials > 0); + + return retlen; +} + +/* + * read() + */ +static ssize_t hmcdrv_dev_read(struct file *fp, char __user *ubuf, + size_t len, loff_t *pos) +{ + ssize_t retlen; + + if (((fp->f_flags & O_ACCMODE) == O_WRONLY) || + (fp->private_data == NULL)) { /* no FTP cmd defined ? */ + return -EBADF; + } + + retlen = hmcdrv_dev_transfer((char *) fp->private_data, + *pos, ubuf, len); + + pr_debug("read from file '/dev/%s' at %lld returns %zd/%zu\n", + fp->f_dentry->d_name.name, (long long) *pos, retlen, len); + + if (retlen > 0) + *pos += retlen; + + return retlen; +} + +/* + * write() + */ +static ssize_t hmcdrv_dev_write(struct file *fp, const char __user *ubuf, + size_t len, loff_t *pos) +{ + ssize_t retlen; + + pr_debug("writing file '/dev/%s' at pos. %lld with length %zd\n", + fp->f_dentry->d_name.name, (long long) *pos, len); + + if (!fp->private_data) { /* first expect a cmd write */ + fp->private_data = kmalloc(len + 1, GFP_KERNEL); + + if (!fp->private_data) + return -ENOMEM; + + if (!copy_from_user(fp->private_data, ubuf, len)) { + ((char *)fp->private_data)[len] = '\0'; + return len; + } + + kfree(fp->private_data); + fp->private_data = NULL; + return -EFAULT; + } + + retlen = hmcdrv_dev_transfer((char *) fp->private_data, + *pos, (char __user *) ubuf, len); + if (retlen > 0) + *pos += retlen; + + pr_debug("write to file '/dev/%s' returned %zd\n", + fp->f_dentry->d_name.name, retlen); + + return retlen; +} + +/** + * hmcdrv_dev_init() - creates a HMC drive CD/DVD device + * + * This function creates a HMC drive CD/DVD kernel device and an associated + * device under /dev, using a dynamically allocated major number. + * + * Return: 0 on success, else an error code. + */ +int hmcdrv_dev_init(void) +{ + int rc; + +#ifdef HMCDRV_DEV_CLASS + struct device *dev; + + rc = alloc_chrdev_region(&hmcdrv_dev_no, 0, 1, HMCDRV_DEV_NAME); + + if (rc) + goto out_err; + + cdev_init(&hmcdrv_dev.dev, &hmcdrv_dev_fops); + hmcdrv_dev.dev.owner = THIS_MODULE; + rc = cdev_add(&hmcdrv_dev.dev, hmcdrv_dev_no, 1); + + if (rc) + goto out_unreg; + + /* At this point the character device exists in the kernel (see + * /proc/devices), but not under /dev nor /sys/devices/virtual. So + * we have to create an associated class (see /sys/class). + */ + hmcdrv_dev_class = class_create(THIS_MODULE, HMCDRV_DEV_CLASS); + + if (IS_ERR(hmcdrv_dev_class)) { + rc = PTR_ERR(hmcdrv_dev_class); + goto out_devdel; + } + + /* Finally a device node in /dev has to be established (as 'mkdev' + * does from the command line). Notice that assignment of a device + * node name/mode function is optional (only for mode != 0600). + */ + hmcdrv_dev.mode = 0; /* "unset" */ + hmcdrv_dev_class->devnode = hmcdrv_dev_name; + + dev = device_create(hmcdrv_dev_class, NULL, hmcdrv_dev_no, NULL, + "%s", HMCDRV_DEV_NAME); + if (!IS_ERR(dev)) + return 0; + + rc = PTR_ERR(dev); + class_destroy(hmcdrv_dev_class); + hmcdrv_dev_class = NULL; + +out_devdel: + cdev_del(&hmcdrv_dev.dev); + +out_unreg: + unregister_chrdev_region(hmcdrv_dev_no, 1); + +out_err: + +#else /* !HMCDRV_DEV_CLASS */ + hmcdrv_dev.dev.minor = MISC_DYNAMIC_MINOR; + hmcdrv_dev.dev.name = HMCDRV_DEV_NAME; + hmcdrv_dev.dev.fops = &hmcdrv_dev_fops; + hmcdrv_dev.dev.mode = 0; /* finally produces 0600 */ + rc = misc_register(&hmcdrv_dev.dev); +#endif /* HMCDRV_DEV_CLASS */ + + return rc; +} + +/** + * hmcdrv_dev_exit() - destroys a HMC drive CD/DVD device + */ +void hmcdrv_dev_exit(void) +{ +#ifdef HMCDRV_DEV_CLASS + if (!IS_ERR_OR_NULL(hmcdrv_dev_class)) { + device_destroy(hmcdrv_dev_class, hmcdrv_dev_no); + class_destroy(hmcdrv_dev_class); + } + + cdev_del(&hmcdrv_dev.dev); + unregister_chrdev_region(hmcdrv_dev_no, 1); +#else /* !HMCDRV_DEV_CLASS */ + misc_deregister(&hmcdrv_dev.dev); +#endif /* HMCDRV_DEV_CLASS */ +} diff --git a/drivers/s390/char/hmcdrv_dev.h b/drivers/s390/char/hmcdrv_dev.h new file mode 100644 index 000000000000..cb17f07e02de --- /dev/null +++ b/drivers/s390/char/hmcdrv_dev.h @@ -0,0 +1,14 @@ +/* + * SE/HMC Drive FTP Device + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_DEV_H__ +#define __HMCDRV_DEV_H__ + +int hmcdrv_dev_init(void); +void hmcdrv_dev_exit(void); + +#endif /* __HMCDRV_DEV_H__ */ diff --git a/drivers/s390/char/hmcdrv_ftp.c b/drivers/s390/char/hmcdrv_ftp.c new file mode 100644 index 000000000000..4bd63322fc29 --- /dev/null +++ b/drivers/s390/char/hmcdrv_ftp.c @@ -0,0 +1,343 @@ +/* + * HMC Drive FTP Services + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/export.h> + +#include <linux/ctype.h> +#include <linux/crc16.h> + +#include "hmcdrv_ftp.h" +#include "hmcdrv_cache.h" +#include "sclp_ftp.h" +#include "diag_ftp.h" + +/** + * struct hmcdrv_ftp_ops - HMC drive FTP operations + * @startup: startup function + * @shutdown: shutdown function + * @cmd: FTP transfer function + */ +struct hmcdrv_ftp_ops { + int (*startup)(void); + void (*shutdown)(void); + ssize_t (*transfer)(const struct hmcdrv_ftp_cmdspec *ftp, + size_t *fsize); +}; + +static enum hmcdrv_ftp_cmdid hmcdrv_ftp_cmd_getid(const char *cmd, int len); +static int hmcdrv_ftp_parse(char *cmd, struct hmcdrv_ftp_cmdspec *ftp); + +static struct hmcdrv_ftp_ops *hmcdrv_ftp_funcs; /* current operations */ +static DEFINE_MUTEX(hmcdrv_ftp_mutex); /* mutex for hmcdrv_ftp_funcs */ +static unsigned hmcdrv_ftp_refcnt; /* start/shutdown reference counter */ + +/** + * hmcdrv_ftp_cmd_getid() - determine FTP command ID from a command string + * @cmd: FTP command string (NOT zero-terminated) + * @len: length of FTP command string in @cmd + */ +static enum hmcdrv_ftp_cmdid hmcdrv_ftp_cmd_getid(const char *cmd, int len) +{ + /* HMC FTP command descriptor */ + struct hmcdrv_ftp_cmd_desc { + const char *str; /* command string */ + enum hmcdrv_ftp_cmdid cmd; /* associated command as enum */ + }; + + /* Description of all HMC drive FTP commands + * + * Notes: + * 1. Array size should be a prime number. + * 2. Do not change the order of commands in table (because the + * index is determined by CRC % ARRAY_SIZE). + * 3. Original command 'nlist' was renamed, else the CRC would + * collide with 'append' (see point 2). + */ + static const struct hmcdrv_ftp_cmd_desc ftpcmds[7] = { + + {.str = "get", /* [0] get (CRC = 0x68eb) */ + .cmd = HMCDRV_FTP_GET}, + {.str = "dir", /* [1] dir (CRC = 0x6a9e) */ + .cmd = HMCDRV_FTP_DIR}, + {.str = "delete", /* [2] delete (CRC = 0x53ae) */ + .cmd = HMCDRV_FTP_DELETE}, + {.str = "nls", /* [3] nls (CRC = 0xf87c) */ + .cmd = HMCDRV_FTP_NLIST}, + {.str = "put", /* [4] put (CRC = 0xac56) */ + .cmd = HMCDRV_FTP_PUT}, + {.str = "append", /* [5] append (CRC = 0xf56e) */ + .cmd = HMCDRV_FTP_APPEND}, + {.str = NULL} /* [6] unused */ + }; + + const struct hmcdrv_ftp_cmd_desc *pdesc; + + u16 crc = 0xffffU; + + if (len == 0) + return HMCDRV_FTP_NOOP; /* error indiactor */ + + crc = crc16(crc, cmd, len); + pdesc = ftpcmds + (crc % ARRAY_SIZE(ftpcmds)); + pr_debug("FTP command '%s' has CRC 0x%04x, at table pos. %lu\n", + cmd, crc, (crc % ARRAY_SIZE(ftpcmds))); + + if (!pdesc->str || strncmp(pdesc->str, cmd, len)) + return HMCDRV_FTP_NOOP; + + pr_debug("FTP command '%s' found, with ID %d\n", + pdesc->str, pdesc->cmd); + + return pdesc->cmd; +} + +/** + * hmcdrv_ftp_parse() - HMC drive FTP command parser + * @cmd: FTP command string "<cmd> <filename>" + * @ftp: Pointer to FTP command specification buffer (output) + * + * Return: 0 on success, else a (negative) error code + */ +static int hmcdrv_ftp_parse(char *cmd, struct hmcdrv_ftp_cmdspec *ftp) +{ + char *start; + int argc = 0; + + ftp->id = HMCDRV_FTP_NOOP; + ftp->fname = NULL; + + while (*cmd != '\0') { + + while (isspace(*cmd)) + ++cmd; + + if (*cmd == '\0') + break; + + start = cmd; + + switch (argc) { + case 0: /* 1st argument (FTP command) */ + while ((*cmd != '\0') && !isspace(*cmd)) + ++cmd; + ftp->id = hmcdrv_ftp_cmd_getid(start, cmd - start); + break; + case 1: /* 2nd / last argument (rest of line) */ + while ((*cmd != '\0') && !iscntrl(*cmd)) + ++cmd; + ftp->fname = start; + /* fall through */ + default: + *cmd = '\0'; + break; + } /* switch */ + + ++argc; + } /* while */ + + if (!ftp->fname || (ftp->id == HMCDRV_FTP_NOOP)) + return -EINVAL; + + return 0; +} + +/** + * hmcdrv_ftp_do() - perform a HMC drive FTP, with data from kernel-space + * @ftp: pointer to FTP command specification + * + * Return: number of bytes read/written or a negative error code + */ +ssize_t hmcdrv_ftp_do(const struct hmcdrv_ftp_cmdspec *ftp) +{ + ssize_t len; + + mutex_lock(&hmcdrv_ftp_mutex); + + if (hmcdrv_ftp_funcs && hmcdrv_ftp_refcnt) { + pr_debug("starting transfer, cmd %d for '%s' at %lld with %zd bytes\n", + ftp->id, ftp->fname, (long long) ftp->ofs, ftp->len); + len = hmcdrv_cache_cmd(ftp, hmcdrv_ftp_funcs->transfer); + } else { + len = -ENXIO; + } + + mutex_unlock(&hmcdrv_ftp_mutex); + return len; +} +EXPORT_SYMBOL(hmcdrv_ftp_do); + +/** + * hmcdrv_ftp_probe() - probe for the HMC drive FTP service + * + * Return: 0 if service is available, else an (negative) error code + */ +int hmcdrv_ftp_probe(void) +{ + int rc; + + struct hmcdrv_ftp_cmdspec ftp = { + .id = HMCDRV_FTP_NOOP, + .ofs = 0, + .fname = "", + .len = PAGE_SIZE + }; + + ftp.buf = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + + if (!ftp.buf) + return -ENOMEM; + + rc = hmcdrv_ftp_startup(); + + if (rc) + return rc; + + rc = hmcdrv_ftp_do(&ftp); + free_page((unsigned long) ftp.buf); + hmcdrv_ftp_shutdown(); + + switch (rc) { + case -ENOENT: /* no such file/media or currently busy, */ + case -EBUSY: /* but service seems to be available */ + rc = 0; + break; + default: /* leave 'rc' as it is for [0, -EPERM, -E...] */ + if (rc > 0) + rc = 0; /* clear length (success) */ + break; + } /* switch */ + + return rc; +} +EXPORT_SYMBOL(hmcdrv_ftp_probe); + +/** + * hmcdrv_ftp_cmd() - Perform a HMC drive FTP, with data from user-space + * + * @cmd: FTP command string "<cmd> <filename>" + * @offset: file position to read/write + * @buf: user-space buffer for read/written directory/file + * @len: size of @buf (read/dir) or number of bytes to write + * + * This function must not be called before hmcdrv_ftp_startup() was called. + * + * Return: number of bytes read/written or a negative error code + */ +ssize_t hmcdrv_ftp_cmd(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len) +{ + int order; + + struct hmcdrv_ftp_cmdspec ftp = {.len = len, .ofs = offset}; + ssize_t retlen = hmcdrv_ftp_parse(cmd, &ftp); + + if (retlen) + return retlen; + + order = get_order(ftp.len); + ftp.buf = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, order); + + if (!ftp.buf) + return -ENOMEM; + + switch (ftp.id) { + case HMCDRV_FTP_DIR: + case HMCDRV_FTP_NLIST: + case HMCDRV_FTP_GET: + retlen = hmcdrv_ftp_do(&ftp); + + if ((retlen >= 0) && + copy_to_user(buf, ftp.buf, retlen)) + retlen = -EFAULT; + break; + + case HMCDRV_FTP_PUT: + case HMCDRV_FTP_APPEND: + if (!copy_from_user(ftp.buf, buf, ftp.len)) + retlen = hmcdrv_ftp_do(&ftp); + else + retlen = -EFAULT; + break; + + case HMCDRV_FTP_DELETE: + retlen = hmcdrv_ftp_do(&ftp); + break; + + default: + retlen = -EOPNOTSUPP; + break; + } + + free_pages((unsigned long) ftp.buf, order); + return retlen; +} + +/** + * hmcdrv_ftp_startup() - startup of HMC drive FTP functionality for a + * dedicated (owner) instance + * + * Return: 0 on success, else an (negative) error code + */ +int hmcdrv_ftp_startup(void) +{ + static struct hmcdrv_ftp_ops hmcdrv_ftp_zvm = { + .startup = diag_ftp_startup, + .shutdown = diag_ftp_shutdown, + .transfer = diag_ftp_cmd + }; + + static struct hmcdrv_ftp_ops hmcdrv_ftp_lpar = { + .startup = sclp_ftp_startup, + .shutdown = sclp_ftp_shutdown, + .transfer = sclp_ftp_cmd + }; + + int rc = 0; + + mutex_lock(&hmcdrv_ftp_mutex); /* block transfers while start-up */ + + if (hmcdrv_ftp_refcnt == 0) { + if (MACHINE_IS_VM) + hmcdrv_ftp_funcs = &hmcdrv_ftp_zvm; + else if (MACHINE_IS_LPAR || MACHINE_IS_KVM) + hmcdrv_ftp_funcs = &hmcdrv_ftp_lpar; + else + rc = -EOPNOTSUPP; + + if (hmcdrv_ftp_funcs) + rc = hmcdrv_ftp_funcs->startup(); + } + + if (!rc) + ++hmcdrv_ftp_refcnt; + + mutex_unlock(&hmcdrv_ftp_mutex); + return rc; +} +EXPORT_SYMBOL(hmcdrv_ftp_startup); + +/** + * hmcdrv_ftp_shutdown() - shutdown of HMC drive FTP functionality for a + * dedicated (owner) instance + */ +void hmcdrv_ftp_shutdown(void) +{ + mutex_lock(&hmcdrv_ftp_mutex); + --hmcdrv_ftp_refcnt; + + if ((hmcdrv_ftp_refcnt == 0) && hmcdrv_ftp_funcs) + hmcdrv_ftp_funcs->shutdown(); + + mutex_unlock(&hmcdrv_ftp_mutex); +} +EXPORT_SYMBOL(hmcdrv_ftp_shutdown); diff --git a/drivers/s390/char/hmcdrv_ftp.h b/drivers/s390/char/hmcdrv_ftp.h new file mode 100644 index 000000000000..f3643a7b3676 --- /dev/null +++ b/drivers/s390/char/hmcdrv_ftp.h @@ -0,0 +1,63 @@ +/* + * SE/HMC Drive FTP Services + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_FTP_H__ +#define __HMCDRV_FTP_H__ + +#include <linux/types.h> /* size_t, loff_t */ + +/* + * HMC drive FTP Service max. length of path (w/ EOS) + */ +#define HMCDRV_FTP_FIDENT_MAX 192 + +/** + * enum hmcdrv_ftp_cmdid - HMC drive FTP commands + * @HMCDRV_FTP_NOOP: do nothing (only for probing) + * @HMCDRV_FTP_GET: read a file + * @HMCDRV_FTP_PUT: (over-) write a file + * @HMCDRV_FTP_APPEND: append to a file + * @HMCDRV_FTP_DIR: list directory long (ls -l) + * @HMCDRV_FTP_NLIST: list files, no directories (name list) + * @HMCDRV_FTP_DELETE: delete a file + * @HMCDRV_FTP_CANCEL: cancel operation (SCLP/LPAR only) + */ +enum hmcdrv_ftp_cmdid { + HMCDRV_FTP_NOOP = 0, + HMCDRV_FTP_GET = 1, + HMCDRV_FTP_PUT = 2, + HMCDRV_FTP_APPEND = 3, + HMCDRV_FTP_DIR = 4, + HMCDRV_FTP_NLIST = 5, + HMCDRV_FTP_DELETE = 6, + HMCDRV_FTP_CANCEL = 7 +}; + +/** + * struct hmcdrv_ftp_cmdspec - FTP command specification + * @id: FTP command ID + * @ofs: offset in file + * @fname: filename (ASCII), null-terminated + * @buf: kernel-space transfer data buffer, 4k aligned + * @len: (max) number of bytes to transfer from/to @buf + */ +struct hmcdrv_ftp_cmdspec { + enum hmcdrv_ftp_cmdid id; + loff_t ofs; + const char *fname; + void __kernel *buf; + size_t len; +}; + +int hmcdrv_ftp_startup(void); +void hmcdrv_ftp_shutdown(void); +int hmcdrv_ftp_probe(void); +ssize_t hmcdrv_ftp_do(const struct hmcdrv_ftp_cmdspec *ftp); +ssize_t hmcdrv_ftp_cmd(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len); + +#endif /* __HMCDRV_FTP_H__ */ diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c new file mode 100644 index 000000000000..505c6a78ee1a --- /dev/null +++ b/drivers/s390/char/hmcdrv_mod.c @@ -0,0 +1,64 @@ +/* + * HMC Drive DVD Module + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/version.h> +#include <linux/stat.h> + +#include "hmcdrv_ftp.h" +#include "hmcdrv_dev.h" +#include "hmcdrv_cache.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Copyright 2013 IBM Corporation"); +MODULE_DESCRIPTION("HMC drive DVD access"); + +/* + * module parameter 'cachesize' + */ +static size_t hmcdrv_mod_cachesize = HMCDRV_CACHE_SIZE_DFLT; +module_param_named(cachesize, hmcdrv_mod_cachesize, ulong, S_IRUGO); + +/** + * hmcdrv_mod_init() - module init function + */ +static int __init hmcdrv_mod_init(void) +{ + int rc = hmcdrv_ftp_probe(); /* perform w/o cache */ + + if (rc) + return rc; + + rc = hmcdrv_cache_startup(hmcdrv_mod_cachesize); + + if (rc) + return rc; + + rc = hmcdrv_dev_init(); + + if (rc) + hmcdrv_cache_shutdown(); + + return rc; +} + +/** + * hmcdrv_mod_exit() - module exit function + */ +static void __exit hmcdrv_mod_exit(void) +{ + hmcdrv_dev_exit(); + hmcdrv_cache_shutdown(); +} + +module_init(hmcdrv_mod_init); +module_exit(hmcdrv_mod_exit); diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index a68b5ec7d042..a88069f8c677 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -19,6 +19,7 @@ #define EVTYP_OPCMD 0x01 #define EVTYP_MSG 0x02 +#define EVTYP_DIAG_TEST 0x07 #define EVTYP_STATECHANGE 0x08 #define EVTYP_PMSGCMD 0x09 #define EVTYP_CNTLPROGOPCMD 0x20 @@ -32,6 +33,7 @@ #define EVTYP_OPCMD_MASK 0x80000000 #define EVTYP_MSG_MASK 0x40000000 +#define EVTYP_DIAG_TEST_MASK 0x02000000 #define EVTYP_STATECHANGE_MASK 0x01000000 #define EVTYP_PMSGCMD_MASK 0x00800000 #define EVTYP_CTLPROGOPCMD_MASK 0x00000001 diff --git a/drivers/s390/char/sclp_diag.h b/drivers/s390/char/sclp_diag.h new file mode 100644 index 000000000000..59c4afa5e670 --- /dev/null +++ b/drivers/s390/char/sclp_diag.h @@ -0,0 +1,89 @@ +/* + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef _SCLP_DIAG_H +#define _SCLP_DIAG_H + +#include <linux/types.h> + +/* return codes for Diagnostic Test FTP Service, as indicated in member + * sclp_diag_ftp::ldflg + */ +#define SCLP_DIAG_FTP_OK 0x80U /* success */ +#define SCLP_DIAG_FTP_LDFAIL 0x01U /* load failed */ +#define SCLP_DIAG_FTP_LDNPERM 0x02U /* not allowed */ +#define SCLP_DIAG_FTP_LDRUNS 0x03U /* LD runs */ +#define SCLP_DIAG_FTP_LDNRUNS 0x04U /* LD does not run */ + +#define SCLP_DIAG_FTP_XPCX 0x80 /* PCX communication code */ +#define SCLP_DIAG_FTP_ROUTE 4 /* routing code for new FTP service */ + +/* + * length of Diagnostic Test FTP Service event buffer + */ +#define SCLP_DIAG_FTP_EVBUF_LEN \ + (offsetof(struct sclp_diag_evbuf, mdd) + \ + sizeof(struct sclp_diag_ftp)) + +/** + * struct sclp_diag_ftp - Diagnostic Test FTP Service model-dependent data + * @pcx: code for PCX communication (should be 0x80) + * @ldflg: load flag (see defines above) + * @cmd: FTP command + * @pgsize: page size (0 = 4kB, 1 = large page size) + * @srcflg: source flag + * @spare: reserved (zeroes) + * @offset: file offset + * @fsize: file size + * @length: buffer size resp. bytes transferred + * @failaddr: failing address + * @bufaddr: buffer address, virtual + * @asce: region or segment table designation + * @fident: file name (ASCII, zero-terminated) + */ +struct sclp_diag_ftp { + u8 pcx; + u8 ldflg; + u8 cmd; + u8 pgsize; + u8 srcflg; + u8 spare; + u64 offset; + u64 fsize; + u64 length; + u64 failaddr; + u64 bufaddr; + u64 asce; + + u8 fident[256]; +} __packed; + +/** + * struct sclp_diag_evbuf - Diagnostic Test (ET7) Event Buffer + * @hdr: event buffer header + * @route: diagnostic route + * @mdd: model-dependent data (@route dependent) + */ +struct sclp_diag_evbuf { + struct evbuf_header hdr; + u16 route; + + union { + struct sclp_diag_ftp ftp; + } mdd; +} __packed; + +/** + * struct sclp_diag_sccb - Diagnostic Test (ET7) SCCB + * @hdr: SCCB header + * @evbuf: event buffer + */ +struct sclp_diag_sccb { + + struct sccb_header hdr; + struct sclp_diag_evbuf evbuf; +} __packed; + +#endif /* _SCLP_DIAG_H */ diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1918d9dff45d..5bd6cb145a87 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -281,7 +281,7 @@ out: static unsigned int __init sclp_con_check_linemode(struct init_sccb *sccb) { - if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK))) + if (!(sccb->sclp_send_mask & EVTYP_OPCMD_MASK)) return 0; if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))) return 0; diff --git a/drivers/s390/char/sclp_ftp.c b/drivers/s390/char/sclp_ftp.c new file mode 100644 index 000000000000..6561cc5b2d5d --- /dev/null +++ b/drivers/s390/char/sclp_ftp.c @@ -0,0 +1,275 @@ +/* + * SCLP Event Type (ET) 7 - Diagnostic Test FTP Services, useable on LPAR + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <linux/string.h> +#include <linux/jiffies.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> + +#include "sclp.h" +#include "sclp_diag.h" +#include "sclp_ftp.h" + +static DECLARE_COMPLETION(sclp_ftp_rx_complete); +static u8 sclp_ftp_ldflg; +static u64 sclp_ftp_fsize; +static u64 sclp_ftp_length; + +/** + * sclp_ftp_txcb() - Diagnostic Test FTP services SCLP command callback + */ +static void sclp_ftp_txcb(struct sclp_req *req, void *data) +{ + struct completion *completion = data; + +#ifdef DEBUG + pr_debug("SCLP (ET7) TX-IRQ, SCCB @ 0x%p: %*phN\n", + req->sccb, 24, req->sccb); +#endif + complete(completion); +} + +/** + * sclp_ftp_rxcb() - Diagnostic Test FTP services receiver event callback + */ +static void sclp_ftp_rxcb(struct evbuf_header *evbuf) +{ + struct sclp_diag_evbuf *diag = (struct sclp_diag_evbuf *) evbuf; + + /* + * Check for Diagnostic Test FTP Service + */ + if (evbuf->type != EVTYP_DIAG_TEST || + diag->route != SCLP_DIAG_FTP_ROUTE || + diag->mdd.ftp.pcx != SCLP_DIAG_FTP_XPCX || + evbuf->length < SCLP_DIAG_FTP_EVBUF_LEN) + return; + +#ifdef DEBUG + pr_debug("SCLP (ET7) RX-IRQ, Event @ 0x%p: %*phN\n", + evbuf, 24, evbuf); +#endif + + /* + * Because the event buffer is located in a page which is owned + * by the SCLP core, all data of interest must be copied. The + * error indication is in 'sclp_ftp_ldflg' + */ + sclp_ftp_ldflg = diag->mdd.ftp.ldflg; + sclp_ftp_fsize = diag->mdd.ftp.fsize; + sclp_ftp_length = diag->mdd.ftp.length; + + complete(&sclp_ftp_rx_complete); +} + +/** + * sclp_ftp_et7() - start a Diagnostic Test FTP Service SCLP request + * @ftp: pointer to FTP descriptor + * + * Return: 0 on success, else a (negative) error code + */ +static int sclp_ftp_et7(const struct hmcdrv_ftp_cmdspec *ftp) +{ + struct completion completion; + struct sclp_diag_sccb *sccb; + struct sclp_req *req; + size_t len; + int rc; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + sccb = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!req || !sccb) { + rc = -ENOMEM; + goto out_free; + } + + sccb->hdr.length = SCLP_DIAG_FTP_EVBUF_LEN + + sizeof(struct sccb_header); + sccb->evbuf.hdr.type = EVTYP_DIAG_TEST; + sccb->evbuf.hdr.length = SCLP_DIAG_FTP_EVBUF_LEN; + sccb->evbuf.hdr.flags = 0; /* clear processed-buffer */ + sccb->evbuf.route = SCLP_DIAG_FTP_ROUTE; + sccb->evbuf.mdd.ftp.pcx = SCLP_DIAG_FTP_XPCX; + sccb->evbuf.mdd.ftp.srcflg = 0; + sccb->evbuf.mdd.ftp.pgsize = 0; + sccb->evbuf.mdd.ftp.asce = _ASCE_REAL_SPACE; + sccb->evbuf.mdd.ftp.ldflg = SCLP_DIAG_FTP_LDFAIL; + sccb->evbuf.mdd.ftp.fsize = 0; + sccb->evbuf.mdd.ftp.cmd = ftp->id; + sccb->evbuf.mdd.ftp.offset = ftp->ofs; + sccb->evbuf.mdd.ftp.length = ftp->len; + sccb->evbuf.mdd.ftp.bufaddr = virt_to_phys(ftp->buf); + + len = strlcpy(sccb->evbuf.mdd.ftp.fident, ftp->fname, + HMCDRV_FTP_FIDENT_MAX); + if (len >= HMCDRV_FTP_FIDENT_MAX) { + rc = -EINVAL; + goto out_free; + } + + req->command = SCLP_CMDW_WRITE_EVENT_DATA; + req->sccb = sccb; + req->status = SCLP_REQ_FILLED; + req->callback = sclp_ftp_txcb; + req->callback_data = &completion; + + init_completion(&completion); + + rc = sclp_add_request(req); + if (rc) + goto out_free; + + /* Wait for end of ftp sclp command. */ + wait_for_completion(&completion); + +#ifdef DEBUG + pr_debug("status of SCLP (ET7) request is 0x%04x (0x%02x)\n", + sccb->hdr.response_code, sccb->evbuf.hdr.flags); +#endif + + /* + * Check if sclp accepted the request. The data transfer runs + * asynchronously and the completion is indicated with an + * sclp ET7 event. + */ + if (req->status != SCLP_REQ_DONE || + (sccb->evbuf.hdr.flags & 0x80) == 0 || /* processed-buffer */ + (sccb->hdr.response_code & 0xffU) != 0x20U) { + rc = -EIO; + } + +out_free: + free_page((unsigned long) sccb); + kfree(req); + return rc; +} + +/** + * sclp_ftp_cmd() - executes a HMC related SCLP Diagnose (ET7) FTP command + * @ftp: pointer to FTP command specification + * @fsize: return of file size (or NULL if undesirable) + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure locking. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t sclp_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize) +{ + ssize_t len; +#ifdef DEBUG + unsigned long start_jiffies; + + pr_debug("starting SCLP (ET7), cmd %d for '%s' at %lld with %zd bytes\n", + ftp->id, ftp->fname, (long long) ftp->ofs, ftp->len); + start_jiffies = jiffies; +#endif + + init_completion(&sclp_ftp_rx_complete); + + /* Start ftp sclp command. */ + len = sclp_ftp_et7(ftp); + if (len) + goto out_unlock; + + /* + * There is no way to cancel the sclp ET7 request, the code + * needs to wait unconditionally until the transfer is complete. + */ + wait_for_completion(&sclp_ftp_rx_complete); + +#ifdef DEBUG + pr_debug("completed SCLP (ET7) request after %lu ms (all)\n", + (jiffies - start_jiffies) * 1000 / HZ); + pr_debug("return code of SCLP (ET7) FTP Service is 0x%02x, with %lld/%lld bytes\n", + sclp_ftp_ldflg, sclp_ftp_length, sclp_ftp_fsize); +#endif + + switch (sclp_ftp_ldflg) { + case SCLP_DIAG_FTP_OK: + len = sclp_ftp_length; + if (fsize) + *fsize = sclp_ftp_fsize; + break; + case SCLP_DIAG_FTP_LDNPERM: + len = -EPERM; + break; + case SCLP_DIAG_FTP_LDRUNS: + len = -EBUSY; + break; + case SCLP_DIAG_FTP_LDFAIL: + len = -ENOENT; + break; + default: + len = -EIO; + break; + } + +out_unlock: + return len; +} + +/* + * ET7 event listener + */ +static struct sclp_register sclp_ftp_event = { + .send_mask = EVTYP_DIAG_TEST_MASK, /* want tx events */ + .receive_mask = EVTYP_DIAG_TEST_MASK, /* want rx events */ + .receiver_fn = sclp_ftp_rxcb, /* async callback (rx) */ + .state_change_fn = NULL, + .pm_event_fn = NULL, +}; + +/** + * sclp_ftp_startup() - startup of FTP services, when running on LPAR + */ +int sclp_ftp_startup(void) +{ +#ifdef DEBUG + unsigned long info; +#endif + int rc; + + rc = sclp_register(&sclp_ftp_event); + if (rc) + return rc; + +#ifdef DEBUG + info = get_zeroed_page(GFP_KERNEL); + + if (info != 0) { + struct sysinfo_2_2_2 *info222 = (struct sysinfo_2_2_2 *)info; + + if (!stsi(info222, 2, 2, 2)) { /* get SYSIB 2.2.2 */ + info222->name[sizeof(info222->name) - 1] = '\0'; + EBCASC_500(info222->name, sizeof(info222->name) - 1); + pr_debug("SCLP (ET7) FTP Service working on LPAR %u (%s)\n", + info222->lpar_number, info222->name); + } + + free_page(info); + } +#endif /* DEBUG */ + return 0; +} + +/** + * sclp_ftp_shutdown() - shutdown of FTP services, when running on LPAR + */ +void sclp_ftp_shutdown(void) +{ + sclp_unregister(&sclp_ftp_event); +} diff --git a/drivers/s390/char/sclp_ftp.h b/drivers/s390/char/sclp_ftp.h new file mode 100644 index 000000000000..98ba3183e7d9 --- /dev/null +++ b/drivers/s390/char/sclp_ftp.h @@ -0,0 +1,21 @@ +/* + * SCLP Event Type (ET) 7 - Diagnostic Test FTP Services, useable on LPAR + * + * Notice that all functions exported here are not reentrant. + * So usage should be exclusive, ensured by the caller (e.g. using a + * mutex). + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __SCLP_FTP_H__ +#define __SCLP_FTP_H__ + +#include "hmcdrv_ftp.h" + +int sclp_ftp_startup(void); +void sclp_ftp_shutdown(void); +ssize_t sclp_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize); + +#endif /* __SCLP_FTP_H__ */ diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c index 3b13d58fe87b..35a84af875ee 100644 --- a/drivers/s390/char/sclp_rw.c +++ b/drivers/s390/char/sclp_rw.c @@ -33,7 +33,7 @@ static void sclp_rw_pm_event(struct sclp_register *reg, /* Event type structure for write message and write priority message */ static struct sclp_register sclp_rw_event = { - .send_mask = EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK, + .send_mask = EVTYP_MSG_MASK, .pm_event_fn = sclp_rw_pm_event, }; @@ -456,14 +456,9 @@ sclp_emit_buffer(struct sclp_buffer *buffer, return -EIO; sccb = buffer->sccb; - if (sclp_rw_event.sclp_receive_mask & EVTYP_MSG_MASK) - /* Use normal write message */ - sccb->msg_buf.header.type = EVTYP_MSG; - else if (sclp_rw_event.sclp_receive_mask & EVTYP_PMSGCMD_MASK) - /* Use write priority message */ - sccb->msg_buf.header.type = EVTYP_PMSGCMD; - else - return -EOPNOTSUPP; + /* Use normal write message */ + sccb->msg_buf.header.type = EVTYP_MSG; + buffer->request.command = SCLP_CMDW_WRITE_EVENT_DATA; buffer->request.status = SCLP_REQ_FILLED; buffer->request.callback = sclp_writedata_callback; diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index b9a9f721716d..ae67386c03d3 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -206,10 +206,6 @@ sclp_vt220_callback(struct sclp_req *request, void *data) static int __sclp_vt220_emit(struct sclp_vt220_request *request) { - if (!(sclp_vt220_register.sclp_receive_mask & EVTYP_VT220MSG_MASK)) { - request->sclp_req.status = SCLP_REQ_FAILED; - return -EIO; - } request->sclp_req.command = SCLP_CMDW_WRITE_EVENT_DATA; request->sclp_req.status = SCLP_REQ_FILLED; request->sclp_req.callback = sclp_vt220_callback; diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 6dc60725de92..77f9b9c2f701 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -402,7 +402,9 @@ __tapechar_ioctl(struct tape_device *device, memset(&get, 0, sizeof(get)); get.mt_type = MT_ISUNKNOWN; get.mt_resid = 0 /* device->devstat.rescnt */; - get.mt_dsreg = device->tape_state; + get.mt_dsreg = + ((device->char_data.block_size << MT_ST_BLKSIZE_SHIFT) + & MT_ST_BLKSIZE_MASK); /* FIXME: mt_gstat, mt_erreg, mt_fileno */ get.mt_gstat = 0; get.mt_erreg = 0; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1884653e4472..efcf48481c5f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -28,6 +28,7 @@ #include <asm/processor.h> #include <asm/irqflags.h> #include <asm/checksum.h> +#include <asm/switch_to.h> #include "sclp.h" #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x) @@ -149,18 +150,21 @@ static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) static int __init init_cpu_info(enum arch_id arch) { - struct save_area *sa; + struct save_area_ext *sa_ext; /* get info for boot cpu from lowcore, stored in the HSA */ - sa = dump_save_area_create(0); - if (!sa) + sa_ext = dump_save_area_create(0); + if (!sa_ext) return -ENOMEM; - if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) { + if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base, + sys_info.sa_size) < 0) { TRACE("could not copy from HSA\n"); - kfree(sa); + kfree(sa_ext); return -EIO; } + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return 0; } @@ -258,7 +262,7 @@ static int zcore_add_lc(char __user *buf, unsigned long start, size_t count) unsigned long sa_start, sa_end; /* save area range */ unsigned long prefix; unsigned long sa_off, len, buf_off; - struct save_area *save_area = dump_save_areas.areas[i]; + struct save_area *save_area = &dump_save_areas.areas[i]->sa; prefix = save_area->pref_reg; sa_start = prefix + sys_info.sa_base; @@ -612,7 +616,7 @@ static void __init zcore_header_init(int arch, struct zcore_header *hdr, hdr->tod = get_tod_clock(); get_cpu_id(&hdr->cpu_id); for (i = 0; i < dump_save_areas.count; i++) { - prefix = dump_save_areas.areas[i]->pref_reg; + prefix = dump_save_areas.areas[i]->sa.pref_reg; hdr->real_cpu_cnt++; if (!prefix) continue; diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 00bfbee0af9e..56eb4ee4deba 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -87,7 +87,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy) struct airq_struct *airq; struct hlist_head *head; - __this_cpu_write(s390_idle.nohz_delay, 1); + set_cpu_flag(CIF_NOHZ_DELAY); tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; head = &airq_lists[tpi_info->isc]; rcu_read_lock(); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 2905d8b0ec95..d5a6f287d2fe 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -561,7 +561,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy) struct subchannel *sch; struct irb *irb; - __this_cpu_write(s390_idle.nohz_delay, 1); + set_cpu_flag(CIF_NOHZ_DELAY); tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; irb = &__get_cpu_var(cio_irb); sch = (struct subchannel *)(unsigned long) tpi_info->intparm; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4038437ff033..99485415dcc2 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -664,6 +664,17 @@ static ssize_t ap_hwtype_show(struct device *dev, } static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL); + +static ssize_t ap_raw_hwtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->raw_hwtype); +} + +static DEVICE_ATTR(raw_hwtype, 0444, ap_raw_hwtype_show, NULL); + static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -734,6 +745,7 @@ static DEVICE_ATTR(ap_functions, 0444, ap_functions_show, NULL); static struct attribute *ap_dev_attrs[] = { &dev_attr_hwtype.attr, + &dev_attr_raw_hwtype.attr, &dev_attr_depth.attr, &dev_attr_request_count.attr, &dev_attr_requestq_count.attr, @@ -1188,6 +1200,10 @@ static int ap_select_domain(void) ap_qid_t qid; int rc, i, j; + /* IF APXA isn't installed, only 16 domains could be defined */ + if (!ap_configuration->ap_extended && (ap_domain_index > 15)) + return -EINVAL; + /* * We want to use a single domain. Either the one specified with * the "domain=" parameter or the domain with the maximum number @@ -1413,9 +1429,13 @@ static void ap_scan_bus(struct work_struct *unused) continue; } break; + case 11: + ap_dev->device_type = 10; + break; default: ap_dev->device_type = device_type; } + ap_dev->raw_hwtype = device_type; rc = ap_query_functions(qid, &device_functions); if (!rc) @@ -1900,9 +1920,15 @@ static void ap_reset_all(void) { int i, j; - for (i = 0; i < AP_DOMAINS; i++) - for (j = 0; j < AP_DEVICES; j++) + for (i = 0; i < AP_DOMAINS; i++) { + if (!ap_test_config_domain(i)) + continue; + for (j = 0; j < AP_DEVICES; j++) { + if (!ap_test_config_card_id(j)) + continue; ap_reset_queue(AP_MKQID(j, i)); + } + } } static struct reset_call ap_reset_call = { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6405ae24a7a6..055a0f956d17 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -31,7 +31,7 @@ #include <linux/types.h> #define AP_DEVICES 64 /* Number of AP devices. */ -#define AP_DOMAINS 16 /* Number of AP domains. */ +#define AP_DOMAINS 256 /* Number of AP domains. */ #define AP_MAX_RESET 90 /* Maximum number of resets. */ #define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ @@ -45,9 +45,9 @@ extern int ap_domain_index; */ typedef unsigned int ap_qid_t; -#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15)) +#define AP_MKQID(_device, _queue) (((_device) & 63) << 8 | ((_queue) & 255)) #define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63) -#define AP_QID_QUEUE(_qid) ((_qid) & 15) +#define AP_QID_QUEUE(_qid) ((_qid) & 255) /** * structy ap_queue_status - Holds the AP queue status. @@ -161,6 +161,7 @@ struct ap_device { ap_qid_t qid; /* AP queue id. */ int queue_depth; /* AP queue depth.*/ int device_type; /* AP device type. */ + int raw_hwtype; /* AP raw hardware type. */ unsigned int functions; /* AP device function bitfield. */ int unregistered; /* marks AP device as unregistered */ struct timer_list timeout; /* Timer for request timeouts. */ diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 0e18c5dcd91f..08f1830cbfc4 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -343,10 +343,11 @@ struct zcrypt_ops *__ops_lookup(unsigned char *name, int variant) break; } } + if (!found || !try_module_get(zops->owner)) + zops = NULL; + spin_unlock_bh(&zcrypt_ops_list_lock); - if (!found) - return NULL; return zops; } @@ -359,8 +360,6 @@ struct zcrypt_ops *zcrypt_msgtype_request(unsigned char *name, int variant) request_module("%s", name); zops = __ops_lookup(name, variant); } - if ((!zops) || (!try_module_get(zops->owner))) - return NULL; return zops; } EXPORT_SYMBOL(zcrypt_msgtype_request); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0aa51b451597..24f97bf74266 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2878,6 +2878,7 @@ #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 #define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 #define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a +#define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183 #define PCI_DEVICE_ID_INTEL_ITC_LPC 0x8186 #define PCI_DEVICE_ID_INTEL_82454GX 0x84c4 #define PCI_DEVICE_ID_INTEL_82450GX 0x84c5 diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 5d586a45a319..a19ddacdac30 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -27,19 +27,23 @@ struct seccomp { struct seccomp_filter *filter; }; -extern int __secure_computing(int); -static inline int secure_computing(int this_syscall) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +extern int __secure_computing(void); +static inline int secure_computing(void) { if (unlikely(test_thread_flag(TIF_SECCOMP))) - return __secure_computing(this_syscall); + return __secure_computing(); return 0; } -/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */ -static inline void secure_computing_strict(int this_syscall) -{ - BUG_ON(secure_computing(this_syscall) != 0); -} +#define SECCOMP_PHASE1_OK 0 +#define SECCOMP_PHASE1_SKIP 1 + +extern u32 seccomp_phase1(struct seccomp_data *sd); +int seccomp_phase2(u32 phase1_result); +#else +extern void secure_computing_strict(int this_syscall); +#endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, char __user *); @@ -56,8 +60,11 @@ static inline int seccomp_mode(struct seccomp *s) struct seccomp { }; struct seccomp_filter { }; -static inline int secure_computing(int this_syscall) { return 0; } +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +static inline int secure_computing(void) { return 0; } +#else static inline void secure_computing_strict(int this_syscall) { return; } +#endif static inline long prctl_get_seccomp(void) { diff --git a/include/linux/tick.h b/include/linux/tick.h index 595ee86f5e0d..eda850ca757a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -108,7 +108,7 @@ extern struct tick_sched *tick_get_tick_sched(int cpu); extern void tick_irq_enter(void); extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu -# define arch_needs_cpu(cpu) (0) +# define arch_needs_cpu() (0) # endif # else static inline void tick_clock_notify(void) { } diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index ef6103bf1f9b..ea9bf2561b9e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -391,6 +391,8 @@ typedef struct elf64_shdr { #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ #define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 84922befea84..4ef9687ac115 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -21,10 +21,11 @@ #include <linux/slab.h> #include <linux/syscalls.h> -/* #define SECCOMP_DEBUG 1 */ +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +#include <asm/syscall.h> +#endif #ifdef CONFIG_SECCOMP_FILTER -#include <asm/syscall.h> #include <linux/filter.h> #include <linux/pid.h> #include <linux/ptrace.h> @@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(int syscall) +static u32 seccomp_run_filters(struct seccomp_data *sd) { struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); - struct seccomp_data sd; + struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ @@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall) /* Make sure cross-thread synced filter points somewhere sane. */ smp_read_barrier_depends(); - populate_seccomp_data(&sd); + if (!sd) { + populate_seccomp_data(&sd_local); + sd = &sd_local; + } /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ for (; f; f = f->prev) { - u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = { }; #endif -int __secure_computing(int this_syscall) +static void __secure_computing_strict(int this_syscall) +{ + int *syscall_whitelist = mode1_syscalls; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + syscall_whitelist = mode1_syscalls_32; +#endif + do { + if (*syscall_whitelist == this_syscall) + return; + } while (*++syscall_whitelist); + +#ifdef SECCOMP_DEBUG + dump_stack(); +#endif + audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + do_exit(SIGKILL); +} + +#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER +void secure_computing_strict(int this_syscall) +{ + int mode = current->seccomp.mode; + + if (mode == 0) + return; + else if (mode == SECCOMP_MODE_STRICT) + __secure_computing_strict(this_syscall); + else + BUG(); +} +#else +int __secure_computing(void) { - int exit_sig = 0; - int *syscall; - u32 ret; + u32 phase1_result = seccomp_phase1(NULL); + + if (likely(phase1_result == SECCOMP_PHASE1_OK)) + return 0; + else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) + return -1; + else + return seccomp_phase2(phase1_result); +} + +#ifdef CONFIG_SECCOMP_FILTER +static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) +{ + u32 filter_ret, action; + int data; /* * Make sure that any changes to mode from another thread have @@ -575,85 +623,127 @@ int __secure_computing(int this_syscall) */ rmb(); - switch (current->seccomp.mode) { - case SECCOMP_MODE_STRICT: - syscall = mode1_syscalls; -#ifdef CONFIG_COMPAT - if (is_compat_task()) - syscall = mode1_syscalls_32; + filter_ret = seccomp_run_filters(sd); + data = filter_ret & SECCOMP_RET_DATA; + action = filter_ret & SECCOMP_RET_ACTION; + + switch (action) { + case SECCOMP_RET_ERRNO: + /* Set the low-order 16-bits as a errno. */ + syscall_set_return_value(current, task_pt_regs(current), + -data, 0); + goto skip; + + case SECCOMP_RET_TRAP: + /* Show the handler the original registers. */ + syscall_rollback(current, task_pt_regs(current)); + /* Let the filter pass back 16 bits of data. */ + seccomp_send_sigsys(this_syscall, data); + goto skip; + + case SECCOMP_RET_TRACE: + return filter_ret; /* Save the rest for phase 2. */ + + case SECCOMP_RET_ALLOW: + return SECCOMP_PHASE1_OK; + + case SECCOMP_RET_KILL: + default: + audit_seccomp(this_syscall, SIGSYS, action); + do_exit(SIGSYS); + } + + unreachable(); + +skip: + audit_seccomp(this_syscall, 0, action); + return SECCOMP_PHASE1_SKIP; +} #endif - do { - if (*syscall == this_syscall) - return 0; - } while (*++syscall); - exit_sig = SIGKILL; - ret = SECCOMP_RET_KILL; - break; + +/** + * seccomp_phase1() - run fast path seccomp checks on the current syscall + * @arg sd: The seccomp_data or NULL + * + * This only reads pt_regs via the syscall_xyz helpers. The only change + * it will make to pt_regs is via syscall_set_return_value, and it will + * only do that if it returns SECCOMP_PHASE1_SKIP. + * + * If sd is provided, it will not read pt_regs at all. + * + * It may also call do_exit or force a signal; these actions must be + * safe. + * + * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should + * be processed normally. + * + * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be + * invoked. In this case, seccomp_phase1 will have set the return value + * using syscall_set_return_value. + * + * If it returns anything else, then the return value should be passed + * to seccomp_phase2 from a context in which ptrace hooks are safe. + */ +u32 seccomp_phase1(struct seccomp_data *sd) +{ + int mode = current->seccomp.mode; + int this_syscall = sd ? sd->nr : + syscall_get_nr(current, task_pt_regs(current)); + + switch (mode) { + case SECCOMP_MODE_STRICT: + __secure_computing_strict(this_syscall); /* may call do_exit */ + return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER - case SECCOMP_MODE_FILTER: { - int data; - struct pt_regs *regs = task_pt_regs(current); - ret = seccomp_run_filters(this_syscall); - data = ret & SECCOMP_RET_DATA; - ret &= SECCOMP_RET_ACTION; - switch (ret) { - case SECCOMP_RET_ERRNO: - /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, - -data, 0); - goto skip; - case SECCOMP_RET_TRAP: - /* Show the handler the original registers. */ - syscall_rollback(current, regs); - /* Let the filter pass back 16 bits of data. */ - seccomp_send_sigsys(this_syscall, data); - goto skip; - case SECCOMP_RET_TRACE: - /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { - syscall_set_return_value(current, regs, - -ENOSYS, 0); - goto skip; - } - /* Allow the BPF to provide the event message */ - ptrace_event(PTRACE_EVENT_SECCOMP, data); - /* - * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. - */ - if (fatal_signal_pending(current)) - break; - if (syscall_get_nr(current, regs) < 0) - goto skip; /* Explicit request to skip. */ - - return 0; - case SECCOMP_RET_ALLOW: - return 0; - case SECCOMP_RET_KILL: - default: - break; - } - exit_sig = SIGSYS; - break; - } + case SECCOMP_MODE_FILTER: + return __seccomp_phase1_filter(this_syscall, sd); #endif default: BUG(); } +} -#ifdef SECCOMP_DEBUG - dump_stack(); -#endif - audit_seccomp(this_syscall, exit_sig, ret); - do_exit(exit_sig); -#ifdef CONFIG_SECCOMP_FILTER -skip: - audit_seccomp(this_syscall, exit_sig, ret); -#endif - return -1; +/** + * seccomp_phase2() - finish slow path seccomp work for the current syscall + * @phase1_result: The return value from seccomp_phase1() + * + * This must be called from a context in which ptrace hooks can be used. + * + * Returns 0 if the syscall should be processed or -1 to skip the syscall. + */ +int seccomp_phase2(u32 phase1_result) +{ + struct pt_regs *regs = task_pt_regs(current); + u32 action = phase1_result & SECCOMP_RET_ACTION; + int data = phase1_result & SECCOMP_RET_DATA; + + BUG_ON(action != SECCOMP_RET_TRACE); + + audit_seccomp(syscall_get_nr(current, regs), 0, action); + + /* Skip these calls if there is no tracer. */ + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, regs, + -ENOSYS, 0); + return -1; + } + + /* Allow the BPF to provide the event message */ + ptrace_event(PTRACE_EVENT_SECCOMP, data); + /* + * The delivery of a fatal signal during event + * notification may silently skip tracer notification. + * Terminating the task now avoids executing a system + * call that may not be intended. + */ + if (fatal_signal_pending(current)) + do_exit(SIGSYS); + if (syscall_get_nr(current, regs) < 0) + return -1; /* Explicit request to skip. */ + + return 0; } +#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ long prctl_get_seccomp(void) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7c1412ea2d29..a73efdf6f696 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -586,7 +586,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || - arch_needs_cpu(cpu) || irq_work_needs_cpu()) { + arch_needs_cpu() || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c9710c9bbee2..736d8e1b6381 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4971,6 +4971,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_start_pfn = node_start_pfn; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, + (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1); #endif calculate_node_totalpages(pgdat, start_pfn, end_pfn, zones_size, zholes_size); diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 650ecc83d7d7..001facfa5b74 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -388,10 +388,6 @@ do_file(char const *const fname) "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (w2(ehdr->e_machine) == EM_S390) { - reltype = R_390_32; - mcount_adjust_32 = -4; - } if (w2(ehdr->e_machine) == EM_MIPS) { reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 397b6b84e8c5..d4b665610d67 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -241,13 +241,6 @@ if ($arch eq "x86_64") { $objcopy .= " -O elf32-i386"; $cc .= " -m32"; -} elsif ($arch eq "s390" && $bits == 32) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$"; - $mcount_adjust = -4; - $alignment = 4; - $ld .= " -m elf_s390"; - $cc .= " -m31"; - } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; $mcount_adjust = -8; |