diff options
Diffstat (limited to 'arch/s390')
122 files changed, 2189 insertions, 860 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e256592eb66e..eae2c64cf69d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,7 +1,7 @@ obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_KVM) += kvm/ -obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-y += crypto/ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a2dcef0aacc7..e161fafb495b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -105,6 +105,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF @@ -123,8 +124,6 @@ config S390 select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_EARLY_PFN_TO_NID - select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER @@ -507,6 +506,21 @@ source kernel/Kconfig.preempt source kernel/Kconfig.hz +config ARCH_RANDOM + def_bool y + prompt "s390 architectural random number generation API" + help + Enable the s390 architectural random number generation API + to provide random data for all consumers within the Linux + kernel. + + When enabled the arch_random_* functions declared in linux/random.h + are implemented. The implementation is based on the s390 CPACF + instruction subfunction TRNG which provides a real true random + number generator. + + If unsure, say Y. + endmenu menu "Memory setup" @@ -537,6 +551,16 @@ config FORCE_MAX_ZONEORDER source "mm/Kconfig" +config MAX_PHYSMEM_BITS + int "Maximum size of supported physical memory in bits (42-53)" + range 42 53 + default "46" + help + This option specifies the maximum supported size of physical memory + in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB). + Increasing the number of bits also increases the kernel image size. + By default 46 bits (64TB) are supported. + config PACK_STACK def_bool y prompt "Pack kernel stack" @@ -614,7 +638,7 @@ if PCI config PCI_NR_FUNCTIONS int "Maximum number of PCI functions (1-4096)" range 1 4096 - default "64" + default "128" help This allows you to specify the maximum number of PCI functions which this kernel will support. @@ -672,6 +696,16 @@ config EADM_SCH To compile this driver as a module, choose M here: the module will be called eadm_sch. +config VFIO_CCW + def_tristate n + prompt "Support for VFIO-CCW subchannels" + depends on S390_CCW_IOMMU && VFIO_MDEV + help + This driver allows usage of I/O subchannels via VFIO-CCW. + + To compile this driver as a module, choose M here: the + module will be called vfio_ccw. + endmenu menu "Dump support" diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index fa95041fa9f6..33ca29333e18 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size) unsigned long decompress_kernel(void) { - unsigned long output_addr; - unsigned char *output; + void *output, *kernel_end; - output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; - check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); - memset(&_bss, 0, &_ebss - &_bss); - free_mem_ptr = (unsigned long)&_end; - free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - output = (unsigned char *) output_addr; + output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE); + kernel_end = output + SZ__bss_start; + check_ipl_parmblock((void *) 0, (unsigned long) kernel_end); #ifdef CONFIG_BLK_DEV_INITRD /* * Move the initrd right behind the end of the decompressed - * kernel image. + * kernel image. This also prevents initrd corruption caused by + * bss clearing since kernel_end will always be located behind the + * current bss section.. */ - if (INITRD_START && INITRD_SIZE && - INITRD_START < (unsigned long) output + SZ__bss_start) { - check_ipl_parmblock(output + SZ__bss_start, - INITRD_START + INITRD_SIZE); - memmove(output + SZ__bss_start, - (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) output + SZ__bss_start; + if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) { + check_ipl_parmblock(kernel_end, INITRD_SIZE); + memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) kernel_end; } #endif + /* + * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be + * initialized afterwards since they reside in bss. + */ + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long) &_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + puts("Uncompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..a5039fa89314 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -73,6 +73,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_PCI_DEBUG=y @@ -609,7 +610,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..83970b5afb2b 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -72,6 +72,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y @@ -560,7 +561,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..fbc6542aaf59 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -70,6 +70,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y @@ -558,7 +559,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 4366a3e3e754..e23d97c13735 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -35,7 +35,6 @@ CONFIG_SCSI_ENCLOSURE=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SRP_ATTRS=y CONFIG_ZFCP=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 402c530c6da5..678d9863e3f0 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o +obj-$(CONFIG_ARCH_RANDOM) += arch_random.o crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c new file mode 100644 index 000000000000..9317b3e645e2 --- /dev/null +++ b/arch/s390/crypto/arch_random.c @@ -0,0 +1,31 @@ +/* + * s390 arch random implementation. + * + * Copyright IBM Corp. 2017 + * Author(s): Harald Freudenberger <freude@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/static_key.h> +#include <asm/cpacf.h> + +DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); + +atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); +EXPORT_SYMBOL(s390_arch_random_counter); + +static int __init s390_arch_random_init(void) +{ + /* check if subfunction CPACF_PRNO_TRNG is available */ + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); + + return 0; +} +arch_initcall(s390_arch_random_init); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index d69ea495c4d7..a4e903ed7e21 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) + if (__ctr_paes_set_key(ctx) != 0) { + if (locked) + spin_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); + } } } if (locked) @@ -613,7 +616,7 @@ out_err: module_init(paes_s390_init); module_exit(paes_s390_fini); -MODULE_ALIAS_CRYPTO("aes-all"); +MODULE_ALIAS_CRYPTO("paes"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 5a3ec04a7082..3e47c4a0f18b 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -81,7 +81,7 @@ struct prng_ws_s { u64 byte_counter; }; -struct ppno_ws_s { +struct prno_ws_s { u32 res; u32 reseed_counter; u64 stream_bytes; @@ -93,7 +93,7 @@ struct prng_data_s { struct mutex mutex; union { struct prng_ws_s prngws; - struct ppno_ws_s ppnows; + struct prno_ws_s prnows; }; u8 *buf; u32 rest; @@ -306,12 +306,12 @@ static int __init prng_sha512_selftest(void) 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; u8 buf[sizeof(random)]; - struct ppno_ws_s ws; + struct prno_ws_s ws; memset(&ws, 0, sizeof(ws)); /* initial seed */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &ws, NULL, 0, seed, sizeof(seed)); /* check working states V and C */ @@ -324,9 +324,9 @@ static int __init prng_sha512_selftest(void) } /* generate random bytes */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &ws, buf, sizeof(buf), NULL, 0); - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &ws, buf, sizeof(buf), NULL, 0); /* check against expected data */ @@ -374,16 +374,16 @@ static int __init prng_sha512_instantiate(void) /* followed by 16 bytes of unique nonce */ get_tod_clock_ext(seed + 64 + 32); - /* initial seed of the ppno drng */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); + /* initial seed of the prno drng */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_data->prnows, NULL, 0, seed, sizeof(seed)); /* if fips mode is enabled, generate a first block of random bytes for the FIPS 140-2 Conditional Self Test */ if (fips_enabled) { prng_data->prev = prng_data->buf + prng_chunk_size; - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_data->prnows, prng_data->prev, prng_chunk_size, NULL, 0); } @@ -412,9 +412,9 @@ static int prng_sha512_reseed(void) if (ret != sizeof(seed)) return ret; - /* do a reseed of the ppno drng with this bytestring */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); + /* do a reseed of the prno drng with this bytestring */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_data->prnows, NULL, 0, seed, sizeof(seed)); return 0; } @@ -425,15 +425,15 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes) int ret; /* reseed needed ? */ - if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { + if (prng_data->prnows.reseed_counter > prng_reseed_limit) { ret = prng_sha512_reseed(); if (ret) return ret; } - /* PPNO generate */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, buf, nbytes, NULL, 0); + /* PRNO generate */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_data->prnows, buf, nbytes, NULL, 0); /* FIPS 140-2 Conditional Self Test */ if (fips_enabled) { @@ -653,7 +653,7 @@ static ssize_t prng_counter_show(struct device *dev, if (mutex_lock_interruptible(&prng_data->mutex)) return -ERESTARTSYS; if (prng_mode == PRNG_MODE_SHA512) - counter = prng_data->ppnows.stream_bytes; + counter = prng_data->prnows.stream_bytes; else counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); @@ -774,8 +774,8 @@ static int __init prng_init(void) /* choose prng mode */ if (prng_mode != PRNG_MODE_TDES) { - /* check for MSA5 support for PPNO operations */ - if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { + /* check for MSA5 support for PRNO operations */ + if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 8aea32fe8bd2..45092b12f54f 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,8 +1,15 @@ generic-y += asm-offsets.h +generic-y += cacheflush.h generic-y += clkdev.h generic-y += dma-contiguous.h +generic-y += div64.h +generic-y += emergency-restart.h generic-y += export.h +generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kmap_types.h +generic-y += local.h +generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h new file mode 100644 index 000000000000..6033901a40b2 --- /dev/null +++ b/arch/s390/include/asm/archrandom.h @@ -0,0 +1,69 @@ +/* + * Kernel interface for the s390 arch_random_* functions + * + * Copyright IBM Corp. 2017 + * + * Author: Harald Freudenberger <freude@de.ibm.com> + * + */ + +#ifndef _ASM_S390_ARCHRANDOM_H +#define _ASM_S390_ARCHRANDOM_H + +#ifdef CONFIG_ARCH_RANDOM + +#include <linux/static_key.h> +#include <linux/atomic.h> +#include <asm/cpacf.h> + +DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); +extern atomic64_t s390_arch_random_counter; + +static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) +{ + cpacf_trng(NULL, 0, buf, nbytes); + atomic64_add(nbytes, &s390_arch_random_counter); +} + +static inline bool arch_has_random(void) +{ + if (static_branch_likely(&s390_arch_random_available)) + return true; + return false; +} + +static inline bool arch_has_random_seed(void) +{ + return arch_has_random(); +} + +static inline bool arch_get_random_long(unsigned long *v) +{ + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; +} + +static inline bool arch_get_random_int(unsigned int *v) +{ + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; +} + +static inline bool arch_get_random_seed_long(unsigned long *v) +{ + return arch_get_random_long(v); +} + +static inline bool arch_get_random_seed_int(unsigned int *v) +{ + return arch_get_random_int(v); +} + +#endif /* CONFIG_ARCH_RANDOM */ +#endif /* _ASM_S390_ARCHRANDOM_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index ac9e2b939d04..ba6d29412344 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -111,20 +111,22 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") static inline int __atomic_cmpxchg(int *ptr, int old, int new) { - asm volatile( - " cs %[old],%[new],%[ptr]" - : [old] "+d" (old), [ptr] "+Q" (*ptr) - : [new] "d" (new) : "cc", "memory"); - return old; + return __sync_val_compare_and_swap(ptr, old, new); +} + +static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new) +{ + return __sync_bool_compare_and_swap(ptr, old, new); } static inline long __atomic64_cmpxchg(long *ptr, long old, long new) { - asm volatile( - " csg %[old],%[new],%[ptr]" - : [old] "+d" (old), [ptr] "+Q" (*ptr) - : [new] "d" (new) : "cc", "memory"); - return old; + return __sync_val_compare_and_swap(ptr, old, new); +} + +static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new) +{ + return __sync_bool_compare_and_swap(ptr, old, new); } #endif /* __ARCH_S390_ATOMIC_OPS__ */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index d92047da5ccb..99902b7b9f0c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -15,14 +15,6 @@ * end up numbered: * |63..............0|127............64|191...........128|255...........192| * - * There are a few little-endian macros used mostly for filesystem - * bitmaps, these work on similar bit array layouts, but byte-oriented: - * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| - * - * The main difference is that bit 3-5 in the bit number field needs to be - * reversed compared to the big-endian bit fields. This can be achieved by - * XOR with 0x38. - * * We also have special functions which work with an MSB0 encoding. * The bits are numbered: * |0..............63|64............127|128...........191|192...........255| @@ -253,6 +245,11 @@ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, unsigned long offset); +#define for_each_set_bit_inv(bit, addr, size) \ + for ((bit) = find_first_bit_inv((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit_inv((addr), (size), (bit) + 1)) + static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr) { return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index bf90d1fd97a5..1bbd9dbfe4e0 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -46,8 +46,8 @@ unreachable(); \ } while (0) -#define __WARN_TAINT(taint) do { \ - __EMIT_BUG(BUGFLAG_TAINT(taint)); \ +#define __WARN_FLAGS(flags) do { \ + __EMIT_BUG(BUGFLAG_WARNING|(flags)); \ } while (0) #define WARN_ON(x) ({ \ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index f7ed88cc066e..7a38ca85190b 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -33,6 +33,24 @@ struct ccw1 { __u32 cda; } __attribute__ ((packed,aligned(8))); +/** + * struct ccw0 - channel command word + * @cmd_code: command code + * @cda: data address + * @flags: flags, like IDA addressing, etc. + * @reserved: will be ignored + * @count: byte count + * + * The format-0 ccw structure. + */ +struct ccw0 { + __u8 cmd_code; + __u32 cda : 24; + __u8 flags; + __u8 reserved; + __u16 count; +} __packed __aligned(8); + #define CCW_FLAG_DC 0x80 #define CCW_FLAG_CC 0x40 #define CCW_FLAG_SLI 0x20 diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index e2dfbf280d12..e06f2556b316 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -25,7 +25,8 @@ #define CPACF_KMO 0xb92b /* MSA4 */ #define CPACF_PCC 0xb92c /* MSA4 */ #define CPACF_KMCTR 0xb92d /* MSA4 */ -#define CPACF_PPNO 0xb93c /* MSA5 */ +#define CPACF_PRNO 0xb93c /* MSA5 */ +#define CPACF_KMA 0xb929 /* MSA8 */ /* * En/decryption modifier bits @@ -123,12 +124,14 @@ #define CPACF_PCKMO_ENC_AES_256_KEY 0x14 /* - * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION) * instruction */ -#define CPACF_PPNO_QUERY 0x00 -#define CPACF_PPNO_SHA512_DRNG_GEN 0x03 -#define CPACF_PPNO_SHA512_DRNG_SEED 0x83 +#define CPACF_PRNO_QUERY 0x00 +#define CPACF_PRNO_SHA512_DRNG_GEN 0x03 +#define CPACF_PRNO_SHA512_DRNG_SEED 0x83 +#define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70 +#define CPACF_PRNO_TRNG 0x72 typedef struct { unsigned char bytes[16]; } cpacf_mask_t; @@ -149,8 +152,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) asm volatile( " spm 0\n" /* pckmo doesn't change the cc */ - /* Parameter registers are ignored, but may not be 0 */ - "0: .insn rrf,%[opc] << 16,2,2,2,0\n" + /* Parameter regs are ignored, but must be nonzero and unique */ + "0: .insn rrf,%[opc] << 16,2,4,6,0\n" " brc 1,0b\n" /* handle partial completion */ : "=m" (*mask) : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) @@ -173,7 +176,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode) case CPACF_PCC: case CPACF_KMCTR: return test_facility(77); /* check for MSA4 */ - case CPACF_PPNO: + case CPACF_PRNO: return test_facility(57); /* check for MSA5 */ default: BUG(); @@ -373,18 +376,18 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, } /** - * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION) * instruction - * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines + * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines * @param: address of parameter block; see POP for details on each func * @dest: address of destination memory area * @dest_len: size of destination memory area in bytes * @seed: address of seed data * @seed_len: size of seed data in bytes */ -static inline void cpacf_ppno(unsigned long func, void *param, - u8 *dest, long dest_len, - const u8 *seed, long seed_len) +static inline void cpacf_prno(unsigned long func, void *param, + u8 *dest, unsigned long dest_len, + const u8 *seed, unsigned long seed_len) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -398,7 +401,32 @@ static inline void cpacf_ppno(unsigned long func, void *param, " brc 1,0b\n" /* handle partial completion */ : [dst] "+a" (r2), [dlen] "+d" (r3) : [fc] "d" (r0), [pba] "a" (r1), - [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO) + [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO) + : "cc", "memory"); +} + +/** + * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction + * @ucbuf: buffer for unconditioned data + * @ucbuf_len: amount of unconditioned data to fetch in bytes + * @cbuf: buffer for conditioned data + * @cbuf_len: amount of conditioned data to fetch in bytes + */ +static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, + u8 *cbuf, unsigned long cbuf_len) +{ + register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG; + register unsigned long r2 asm("2") = (unsigned long) ucbuf; + register unsigned long r3 asm("3") = (unsigned long) ucbuf_len; + register unsigned long r4 asm("4") = (unsigned long) cbuf; + register unsigned long r5 asm("5") = (unsigned long) cbuf_len; + + asm volatile ( + "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n" + " brc 1,0b\n" /* handle partial completion */ + : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3), + [cbuf] "+a" (r4), [cbuflen] "+d" (r5) + : [fc] "d" (r0), [opc] "i" (CPACF_PRNO) : "cc", "memory"); } diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index d1e0707310fd..05480e4cc5ca 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -20,9 +20,11 @@ #define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ #define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_MTDA (1 << 15) /* loss of MT ctr. data alert */ #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ -#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \ + CPU_MF_INT_CF_LCDA) #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) @@ -172,7 +174,7 @@ static inline int lcctl(u64 ctl) /* Extract CPU counter */ static inline int __ecctr(u64 ctr, u64 *content) { - register u64 _content asm("4") = 0; + u64 _content; int cc; asm volatile ( diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d1c407ddf703..9072bf63a846 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,31 +8,27 @@ #define _S390_CPUTIME_H #include <linux/types.h> -#include <asm/div64.h> +#include <asm/timex.h> #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long __nocast cputime_t; -typedef unsigned long long __nocast cputime64_t; - #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) -static inline unsigned long __div(unsigned long long n, unsigned long base) -{ - return n / base; -} - /* - * Convert cputime to microseconds and back. + * Convert cputime to microseconds. */ -static inline unsigned int cputime_to_usecs(const cputime_t cputime) +static inline u64 cputime_to_usecs(const u64 cputime) { - return (__force unsigned long long) cputime >> 12; + return cputime >> 12; } +/* + * Convert cputime to nanoseconds. + */ +#define cputime_to_nsecs(cputime) tod_to_ns(cputime) u64 arch_cpu_idle_time(int cpu); diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/s390/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -105,6 +105,7 @@ #define HWCAP_S390_VXRS 2048 #define HWCAP_S390_VXRS_BCD 4096 #define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 /* Internal bits, not exposed via elf */ #define HWCAP_INT_SIE 1UL diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/s390/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h new file mode 100644 index 000000000000..16cfe2d62eeb --- /dev/null +++ b/arch/s390/include/asm/extable.h @@ -0,0 +1,28 @@ +#ifndef __S390_EXTABLE_H +#define __S390_EXTABLE_H +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + int insn, fixup; +}; + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#define ARCH_HAS_RELATIVE_EXTABLE + +#endif diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 09b406db7529..cb60d5c5755d 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -8,14 +8,11 @@ #define __ASM_FACILITY_H #include <generated/facilities.h> - -#ifndef __ASSEMBLY__ - #include <linux/string.h> #include <linux/preempt.h> #include <asm/lowcore.h> -#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8) static inline int __test_facility(unsigned long nr, void *facilities) { @@ -72,5 +69,4 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_enable(); } -#endif /* __ASSEMBLY__ */ #endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/s390/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 68d7d68300f2..8a0b721a9b8d 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h @@ -16,6 +16,7 @@ #define CONSOLE_ISC 1 /* console I/O subchannel */ #define EADM_SCH_ISC 4 /* EADM subchannels */ #define CHSC_SCH_ISC 7 /* CHSC subchannels */ +#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */ /* Adapter interrupts. */ #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ #define PCI_ISC 2 /* PCI I/O subchannels */ diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h deleted file mode 100644 index 0a88622339ee..000000000000 --- a/arch/s390/include/asm/kmap_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#include <asm-generic/kmap_types.h> - -#endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a41faf34b034..426614a882a9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <asm/cpu.h> #include <asm/fpu/api.h> #include <asm/isc.h> +#include <asm/guarded_storage.h> #define KVM_S390_BSCA_CPU_SLOTS 64 #define KVM_S390_ESCA_CPU_SLOTS 248 @@ -121,6 +122,7 @@ struct esca_block { #define CPUSTAT_SLSR 0x00002000 #define CPUSTAT_ZARCH 0x00000800 #define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_IBS 0x00000040 #define CPUSTAT_GED2 0x00000010 @@ -164,16 +166,27 @@ struct kvm_s390_sie_block { #define ICTL_RRBE 0x00001000 #define ICTL_TPROT 0x00000200 __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_SII 0x00000001 __u32 eca; /* 0x004c */ #define ICPT_INST 0x04 #define ICPT_PROGI 0x08 #define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 #define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c #define ICPT_VALIDITY 0x20 #define ICPT_STOP 0x28 #define ICPT_OPEREXC 0x2C #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c __u8 icptcode; /* 0x0050 */ __u8 icptstatus; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ @@ -182,10 +195,19 @@ struct kvm_s390_sie_block { __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 __u8 ecb2; /* 0x0062 */ -#define ECB3_AES 0x04 #define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ @@ -219,11 +241,14 @@ struct kvm_s390_sie_block { __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ - __u8 reserved188[24]; /* 0x0188 */ + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 __u32 ecd; /* 0x01c8 */ __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ @@ -498,6 +523,12 @@ struct kvm_s390_local_interrupt { #define FIRQ_CNTR_PFAULT 3 #define FIRQ_MAX_COUNT 4 +/* mask the AIS mode for a given ISC */ +#define AIS_MODE_MASK(isc) (0x80 >> isc) + +#define KVM_S390_AIS_MODE_ALL 0 +#define KVM_S390_AIS_MODE_SINGLE 1 + struct kvm_s390_float_interrupt { unsigned long pending_irqs; spinlock_t lock; @@ -507,6 +538,10 @@ struct kvm_s390_float_interrupt { struct kvm_s390_ext_info srv_signal; int next_rr_cpu; unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + struct mutex ais_lock; + u8 simm; + u8 nimm; + int ais_enabled; }; struct kvm_hw_wp_info_arch { @@ -554,6 +589,7 @@ struct kvm_vcpu_arch { /* if vsie is active, currently executed shadow sie control block */ struct kvm_s390_sie_block *vsie_block; unsigned int host_acrs[NUM_ACRS]; + struct gs_cb *host_gscb; struct fpu host_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; @@ -574,6 +610,7 @@ struct kvm_vcpu_arch { */ seqcount_t cputm_seqcount; __u64 cputm_start; + bool gs_enabled; }; struct kvm_vm_stat { @@ -596,6 +633,7 @@ struct s390_io_adapter { bool maskable; bool masked; bool swap; + bool suppressible; struct rw_semaphore maps_lock; struct list_head maps; atomic_t nr_maps; diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/s390/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/s390/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,8 +157,8 @@ struct lowcore { __u64 stfle_fac_list[32]; /* 0x0f00 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ - /* Pointer to vector register save area */ - __u64 vector_save_area_addr; /* 0x11b0 */ + /* Pointer to the machine check extended save area */ + __u64 mcesad; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -182,10 +182,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ - __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - - /* Software defined save area for vector registers */ - __u8 vector_save_area[1024]; /* 0x1c00 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index b55a59e1d134..b79813d9cf68 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -8,8 +8,4 @@ #include <uapi/asm/mman.h> -#ifndef __ASSEMBLY__ -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags); -#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags) -#endif #endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bea785d7f853..bd6f30304518 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -22,6 +22,8 @@ typedef struct { unsigned int has_pgste:1; /* The mmu context uses storage keys. */ unsigned int use_skey:1; + /* The mmu context uses CMMA. */ + unsigned int use_cmma:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 6e31d87fb669..8712e11bead4 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,6 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; + mm->context.use_cmma = 0; #endif switch (mm->context.asce_limit) { case 1UL << 42: @@ -156,10 +157,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, /* by default, allow everything */ return true; } - -static inline bool arch_pte_access_permitted(pte_t pte, bool write) -{ - /* by default, allow everything */ - return true; -} #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -58,7 +58,9 @@ union mci { u64 ie : 1; /* 32 indirect storage error */ u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ - u64 : 7; /* 35-41 */ + u64 : 1; /* 35 */ + u64 gs : 1; /* 36 guarded storage registers */ + u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ u64 ap : 1; /* 44 ancillary report */ @@ -69,6 +71,14 @@ union mci { }; }; +#define MCESA_ORIGIN_MASK (~0x3ffUL) +#define MCESA_LC_MASK (0xfUL) + +struct mcesa { + u8 vector_save_area[1024]; + u8 guarded_storage_save_area[32]; +}; + struct pt_regs; extern void s390_handle_mcck(void); diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h new file mode 100644 index 000000000000..42267a2fe29e --- /dev/null +++ b/arch/s390/include/asm/page-states.h @@ -0,0 +1,19 @@ +/* + * Copyright IBM Corp. 2017 + * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> + */ + +#ifndef PAGE_STATES_H +#define PAGE_STATES_H + +#define ESSA_GET_STATE 0 +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 +#define ESSA_SET_VOLATILE 3 +#define ESSA_SET_POT_VOLATILE 4 +#define ESSA_SET_STABLE_RESIDENT 5 +#define ESSA_SET_STABLE_IF_RESIDENT 6 + +#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT + +#endif diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index c64c0befd3f3..dd32beb9d30c 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,7 +1,7 @@ /* * Performance event support - s390 specific definitions. * - * Copyright IBM Corp. 2009, 2013 + * Copyright IBM Corp. 2009, 2017 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ @@ -47,7 +47,7 @@ struct perf_sf_sde_regs { }; /* Perf PMU definitions for the counter facility */ -#define PERF_CPUM_CF_MAX_CTR 256 +#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */ /* Perf PMU definitions for the sampling facility */ #define PERF_CPUM_SF_MAX_CTR 2 diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..e6e3b887bee3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> @@ -371,10 +372,12 @@ static inline int is_module_addr(void *addr) #define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ -#define _PGSTE_GPS_ZERO 0x0000000080000000UL -#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL -#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL -#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL +#define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL +#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL +#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK /* * A user page table pointer has the space-switch-event bit, the @@ -1040,6 +1043,12 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); +int set_pgste_bits(struct mm_struct *mm, unsigned long addr, + unsigned long bits, unsigned long value); +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste); + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1050,6 +1059,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, { if (!MACHINE_HAS_NX) pte_val(entry) &= ~_PAGE_NOEXEC; + if (pte_present(entry)) + pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index b48aef4188f6..4c484590d858 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -87,4 +87,25 @@ int pkey_findcard(const struct pkey_seckey *seckey, int pkey_skey2pkey(const struct pkey_seckey *seckey, struct pkey_protkey *protkey); +/* + * Verify the given secure key for being able to be useable with + * the pkey module. Check for correct key type and check for having at + * least one crypto card being able to handle this key (master key + * or old master key verification pattern matches). + * Return some info about the key: keysize in bits, keytype (currently + * only AES), flag if key is wrapped with an old MKVP. + * @param seckey pointer to buffer with the input secure key + * @param pcardnr pointer to cardnr, receives the card number on success + * @param pdomain pointer to domain, receives the domain number on success + * @param pkeysize pointer to keysize, receives the bitsize of the key + * @param pattributes pointer to attributes, receives additional info + * PKEY_VERIFY_ATTR_AES if the key is an AES key + * PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in + * @return 0 on success, negative errno value on failure. If no card could + * be found which is able to handle this key, -ENODEV is returned. + */ +int pkey_verifykey(const struct pkey_seckey *seckey, + u16 *pcardnr, u16 *pdomain, + u16 *pkeysize, u32 *pattributes); + #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..60d395fdc864 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -91,14 +91,15 @@ extern void execve_tail(void); * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. */ -#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \ - (tsk)->mm->context.asce_limit : TASK_MAX_SIZE) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ + (1UL << 31) : (1UL << 53)) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) -#define TASK_MAX_SIZE (1UL << 53) +#define TASK_SIZE_MAX (1UL << 53) -#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) +#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \ + (1UL << 31) : (1UL << 42)) #define STACK_TOP_MAX (1UL << 42) #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -135,6 +136,8 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at @@ -215,6 +218,9 @@ void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); +/* Free guarded storage control block for current */ +void exit_thread_gs(void); + /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index ace3bd315438..6f5167bc1928 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -75,6 +75,7 @@ struct sclp_info { unsigned char has_pfmfi : 1; unsigned char has_ibs : 1; unsigned char has_skey : 1; + unsigned char has_kss : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 5ce29fe100ba..fbd9116eb17b 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,5 @@ #include <asm-generic/sections.h> extern char _eshared[], _ehead[]; -extern char __start_ro_after_init[], __end_ro_after_init[]; #endif diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/set_memory.h index 0499334f9473..46a4db44c47a 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/set_memory.h @@ -1,8 +1,5 @@ -#ifndef _S390_CACHEFLUSH_H -#define _S390_CACHEFLUSH_H - -/* Caches aren't brain-dead on the s390. */ -#include <asm-generic/cacheflush.h> +#ifndef _ASMS390_SET_MEMORY_H +#define _ASMS390_SET_MEMORY_H #define SET_MEMORY_RO 1UL #define SET_MEMORY_RW 2UL @@ -31,4 +28,4 @@ static inline int set_memory_x(unsigned long addr, int numpages) return __set_memory(addr, numpages, SET_MEMORY_X); } -#endif /* _S390_CACHEFLUSH_H */ +#endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..cd78155b1829 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -29,8 +29,8 @@ #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_CAD _BITUL(14) -#define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_NX _BITUL(14) +#define MACHINE_FLAG_GS _BITUL(15) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -68,8 +68,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) -#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h index 487428b6d099..334e279f1bce 100644 --- a/arch/s390/include/asm/sparsemem.h +++ b/arch/s390/include/asm/sparsemem.h @@ -2,6 +2,6 @@ #define _ASM_S390_SPARSEMEM_H #define SECTION_SIZE_BITS 28 -#define MAX_PHYSMEM_BITS 46 +#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS #endif /* _ASM_S390_SPARSEMEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index ffc45048ea7d..f7838ecd83c6 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,7 @@ #define __ASM_SPINLOCK_H #include <linux/smp.h> +#include <asm/atomic_ops.h> #include <asm/barrier.h> #include <asm/processor.h> @@ -17,12 +18,6 @@ extern int spin_retry; -static inline int -_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) -{ - return __sync_bool_compare_and_swap(lock, old, new); -} - #ifndef CONFIG_SMP static inline bool arch_vcpu_is_preempted(int cpu) { return false; } #else @@ -40,7 +35,7 @@ bool arch_vcpu_is_preempted(int cpu); * (the type definitions are in asm/spinlock_types.h) */ -void arch_lock_relax(unsigned int cpu); +void arch_lock_relax(int cpu); void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); @@ -70,7 +65,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) { barrier(); return likely(arch_spin_value_unlocked(*lp) && - _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); + __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL)); } static inline void arch_spin_lock(arch_spinlock_t *lp) @@ -95,7 +90,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - typecheck(unsigned int, lp->lock); + typecheck(int, lp->lock); asm volatile( "st %1,%0\n" : "+Q" (lp->lock) @@ -141,16 +136,16 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp); static inline int arch_read_trylock_once(arch_rwlock_t *rw) { - unsigned int old = ACCESS_ONCE(rw->lock); - return likely((int) old >= 0 && - _raw_compare_and_swap(&rw->lock, old, old + 1)); + int old = ACCESS_ONCE(rw->lock); + return likely(old >= 0 && + __atomic_cmpxchg_bool(&rw->lock, old, old + 1)); } static inline int arch_write_trylock_once(arch_rwlock_t *rw) { - unsigned int old = ACCESS_ONCE(rw->lock); + int old = ACCESS_ONCE(rw->lock); return likely(old == 0 && - _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); + __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)); } #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -161,9 +156,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) #define __RAW_LOCK(ptr, op_val, op_string) \ ({ \ - unsigned int old_val; \ + int old_val; \ \ - typecheck(unsigned int *, ptr); \ + typecheck(int *, ptr); \ asm volatile( \ op_string " %0,%2,%1\n" \ "bcr 14,0\n" \ @@ -175,9 +170,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) #define __RAW_UNLOCK(ptr, op_val, op_string) \ ({ \ - unsigned int old_val; \ + int old_val; \ \ - typecheck(unsigned int *, ptr); \ + typecheck(int *, ptr); \ asm volatile( \ op_string " %0,%2,%1\n" \ : "=d" (old_val), "+Q" (*ptr) \ @@ -187,14 +182,14 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) }) extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); +extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev); static inline void arch_read_lock(arch_rwlock_t *rw) { - unsigned int old; + int old; old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); - if ((int) old < 0) + if (old < 0) _raw_read_lock_wait(rw); } @@ -205,7 +200,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_lock(arch_rwlock_t *rw) { - unsigned int old; + int old; old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); if (old != 0) @@ -232,11 +227,11 @@ static inline void arch_read_lock(arch_rwlock_t *rw) static inline void arch_read_unlock(arch_rwlock_t *rw) { - unsigned int old; + int old; do { old = ACCESS_ONCE(rw->lock); - } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); + } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); } static inline void arch_write_lock(arch_rwlock_t *rw) @@ -248,7 +243,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { - typecheck(unsigned int, rw->lock); + typecheck(int, rw->lock); rw->owner = 0; asm volatile( diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index d84b6939237c..fe755eec275f 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -6,14 +6,14 @@ #endif typedef struct { - unsigned int lock; + int lock; } __attribute__ ((aligned (4))) arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } typedef struct { - unsigned int lock; - unsigned int owner; + int lock; + int owner; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,6 +10,7 @@ #include <linux/thread_info.h> #include <asm/fpu/api.h> #include <asm/ptrace.h> +#include <asm/guarded_storage.h> extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) save_fpu_regs(); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 229326c942c7..73bff45ced55 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -142,7 +142,15 @@ struct sysinfo_3_2_2 { extern int topology_max_mnest; -#define TOPOLOGY_CORE_BITS 64 +/* + * Returns the maximum nesting level supported by the cpu topology code. + * The current maximum level is 4 which is the drawer level. + */ +static inline int topology_mnest_limit(void) +{ + return min(topology_max_mnest, 4); +} + #define TOPOLOGY_NR_MAG 6 struct topology_core { @@ -152,7 +160,7 @@ struct topology_core { unsigned char pp:2; unsigned char reserved1; unsigned short origin; - unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG]; + unsigned long mask; }; struct topology_container { diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..0b3ee083a665 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -51,14 +51,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); /* * thread information flags bit numbers */ +/* _TIF_WORK bits */ #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SECCOMP 5 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_UPROBE 3 /* breakpointed or single-stepping */ +#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_PATCH_PENDING 5 /* pending live patching update */ + #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -66,15 +66,25 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ +/* _TIF_TRACE bits */ +#define TIF_SYSCALL_TRACE 24 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ +#define TIF_SECCOMP 26 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ + #define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING) #define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED) +#define _TIF_UPROBE _BITUL(TIF_UPROBE) +#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) +#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING) + +#define _TIF_31BIT _BITUL(TIF_31BIT) +#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) + #define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP _BITUL(TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT) -#define _TIF_UPROBE _BITUL(TIF_UPROBE) -#define _TIF_31BIT _BITUL(TIF_31BIT) -#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 354344dcc198..118535123f34 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void) * ns = (todval * 125) >> 9; * * In order to avoid an overflow with the multiplication we can rewrite this. - * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits) * we end up with * - * ns = ((2^32 * th + tl) * 125 ) >> 9; - * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * ns = ((2^9 * th + tl) * 125 ) >> 9; + * -> ns = (th * 125) + ((tl * 125) >> 9); * */ static inline unsigned long long tod_to_ns(unsigned long long todval) { - unsigned long long ns; - - ns = ((todval >> 32) << 23) * 125; - ns += ((todval & 0xffffffff) * 125) >> 9; - return ns; + return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } #endif diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 136932ff4250..78f3f093d143 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -12,13 +12,9 @@ /* * User space memory access functions */ -#include <linux/sched.h> -#include <linux/errno.h> #include <asm/processor.h> #include <asm/ctl_reg.h> - -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 +#include <asm/extable.h> /* @@ -42,7 +38,7 @@ static inline void set_fs(mm_segment_t fs) { current->thread.mm_segment = fs; - if (segment_eq(fs, KERNEL_DS)) { + if (uaccess_kernel()) { set_cpu_flag(CIF_ASCE_SECONDARY); __ctl_load(S390_lowcore.kernel_asce, 7, 7); } else { @@ -64,72 +60,14 @@ static inline int __range_ok(unsigned long addr, unsigned long size) #define access_ok(type, addr, size) __access_ok(addr, size) -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry -{ - int insn, fixup; -}; - -static inline unsigned long extable_fixup(const struct exception_table_entry *x) -{ - return (unsigned long)&x->fixup + x->fixup; -} - -#define ARCH_HAS_RELATIVE_EXTABLE - -/** - * __copy_from_user: - Copy a block of data from user space, with less checking. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Copy data from user space to kernel space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -unsigned long __must_check __copy_from_user(void *to, const void __user *from, - unsigned long n); +unsigned long __must_check +raw_copy_from_user(void *to, const void __user *from, unsigned long n); -/** - * __copy_to_user: - Copy a block of data into user space, with less checking. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Copy data from kernel space to user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long __must_check __copy_to_user(void __user *to, const void *from, - unsigned long n); +unsigned long __must_check +raw_copy_to_user(void __user *to, const void *from, unsigned long n); -#define __copy_to_user_inatomic __copy_to_user -#define __copy_from_user_inatomic __copy_from_user +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES @@ -147,7 +85,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, " jg 2b\n" \ ".popsection\n" \ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : "=d" (__rc), "=Q" (*(to)) \ + : "=d" (__rc), "+Q" (*(to)) \ : "d" (size), "Q" (*(from)), \ "d" (__reg0), "K" (-EFAULT) \ : "cc"); \ @@ -218,13 +156,13 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - size = __copy_to_user(ptr, x, size); + size = raw_copy_to_user(ptr, x, size); return size ? -EFAULT : 0; } static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - size = __copy_from_user(x, ptr, size); + size = raw_copy_from_user(x, ptr, size); return size ? -EFAULT : 0; } @@ -314,77 +252,8 @@ int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user -extern void __compiletime_error("usercopy buffer size is too small") -__bad_copy_user(void); - -static inline void copy_user_overflow(int size, unsigned long count) -{ - WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); -} - -/** - * copy_to_user: - Copy a block of data into user space. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Copy data from kernel space to user space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -static inline unsigned long __must_check -copy_to_user(void __user *to, const void *from, unsigned long n) -{ - might_fault(); - return __copy_to_user(to, from, n); -} - -/** - * copy_from_user: - Copy a block of data from user space. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Copy data from user space to kernel space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -static inline unsigned long __must_check -copy_from_user(void *to, const void __user *from, unsigned long n) -{ - unsigned int sz = __compiletime_object_size(to); - - might_fault(); - if (unlikely(sz != -1 && sz < n)) { - if (!__builtin_constant_p(n)) - copy_user_overflow(sz, n); - else - __bad_copy_user(); - return n; - } - return __copy_from_user(to, from, n); -} - unsigned long __must_check -__copy_in_user(void __user *to, const void __user *from, unsigned long n); - -static inline unsigned long __must_check -copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - might_fault(); - return __copy_in_user(to, from, n); -} +raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); /* * Copy a null terminated string from userspace. diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..addb09cee0f5 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -1,6 +1,16 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += errno.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += mman.h +generic-y += param.h +generic-y += poll.h +generic-y += resource.h +generic-y += sockios.h +generic-y += termbits.h + header-y += auxvec.h header-y += bitsperlong.h header-y += byteorder.h @@ -11,25 +21,20 @@ header-y += cmb.h header-y += dasd.h header-y += debug.h header-y += errno.h -header-y += fcntl.h +header-y += guarded_storage.h header-y += hypfs.h -header-y += ioctl.h header-y += ioctls.h header-y += ipcbuf.h header-y += kvm.h header-y += kvm_para.h header-y += kvm_perf.h header-y += kvm_virtio.h -header-y += mman.h header-y += monwriter.h header-y += msgbuf.h -header-y += param.h header-y += pkey.h -header-y += poll.h header-y += posix_types.h header-y += ptrace.h header-y += qeth.h -header-y += resource.h header-y += schid.h header-y += sclp_ctl.h header-y += sembuf.h @@ -40,12 +45,10 @@ header-y += sigcontext.h header-y += siginfo.h header-y += signal.h header-y += socket.h -header-y += sockios.h header-y += stat.h header-y += statfs.h header-y += swab.h header-y += tape390.h -header-y += termbits.h header-y += termios.h header-y += types.h header-y += ucontext.h diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h deleted file mode 100644 index 395e97d8005e..000000000000 --- a/arch/s390/include/uapi/asm/errno.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * S390 version - * - */ - -#ifndef _S390_ERRNO_H -#define _S390_ERRNO_H - -#include <asm-generic/errno.h> - -#endif diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/s390/include/uapi/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h @@ -0,0 +1,77 @@ +#ifndef _GUARDED_STORAGE_H +#define _GUARDED_STORAGE_H + +#include <linux/types.h> + +struct gs_cb { + __u64 reserved; + __u64 gsd; + __u64 gssm; + __u64 gs_epl_a; +}; + +struct gs_epl { + __u8 pad1; + union { + __u8 gs_eam; + struct { + __u8 : 6; + __u8 e : 1; + __u8 b : 1; + }; + }; + union { + __u8 gs_eci; + struct { + __u8 tx : 1; + __u8 cx : 1; + __u8 : 5; + __u8 in : 1; + }; + }; + union { + __u8 gs_eai; + struct { + __u8 : 1; + __u8 t : 1; + __u8 as : 2; + __u8 ar : 4; + }; + }; + __u32 pad2; + __u64 gs_eha; + __u64 gs_eia; + __u64 gs_eoa; + __u64 gs_eir; + __u64 gs_era; +}; + +#define GS_ENABLE 0 +#define GS_DISABLE 1 +#define GS_SET_BC_CB 2 +#define GS_CLEAR_BC_CB 3 +#define GS_BROADCAST 4 + +static inline void load_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); +} + +static inline void store_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); +} + +static inline void save_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + store_gs_cb(gs_cb); +} + +static inline void restore_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + load_gs_cb(gs_cb); +} + +#endif /* _GUARDED_STORAGE_H */ diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/s390/include/uapi/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad..3dd2a1d308dd 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kmo[16]; /* with MSA4 */ __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ - __u8 reserved[1824]; + __u8 kma[16]; /* with MSA8 */ + __u8 reserved[1808]; }; /* kvm attributes for crypto */ @@ -197,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h deleted file mode 100644 index de23da1f41b2..000000000000 --- a/arch/s390/include/uapi/asm/mman.h +++ /dev/null @@ -1,6 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/mman.h" - */ -#include <asm-generic/mman.h> diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h deleted file mode 100644 index c616821bf2ac..000000000000 --- a/arch/s390/include/uapi/asm/param.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASMS390_PARAM_H -#define _ASMS390_PARAM_H - -#include <asm-generic/param.h> - -#endif /* _ASMS390_PARAM_H */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index ed7f19c27ce5..e6c04faf8a6c 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -109,4 +109,23 @@ struct pkey_skey2pkey { }; #define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey) +/* + * Verify the given secure key for being able to be useable with + * the pkey module. Check for correct key type and check for having at + * least one crypto card being able to handle this key (master key + * or old master key verification pattern matches). + * Return some info about the key: keysize in bits, keytype (currently + * only AES), flag if key is wrapped with an old MKVP. + */ +struct pkey_verifykey { + struct pkey_seckey seckey; /* in: the secure key blob */ + __u16 cardnr; /* out: card number */ + __u16 domain; /* out: domain number */ + __u16 keysize; /* out: key size in bits */ + __u32 attributes; /* out: attribute bits */ +}; +#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey) +#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */ +#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */ + #endif /* _UAPI_PKEY_H */ diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/s390/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h deleted file mode 100644 index ec23d1c73c92..000000000000 --- a/arch/s390/include/uapi/asm/resource.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/resources.h" - */ - -#ifndef _S390_RESOURCE_H -#define _S390_RESOURCE_H - -#include <asm-generic/resource.h> - -#endif - diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index b24a64cbfeb1..e8e5ecf673fd 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -98,4 +98,10 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + +#define SO_INCOMING_NAPI_ID 56 + +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h deleted file mode 100644 index 6f60eee73242..000000000000 --- a/arch/s390/include/uapi/asm/sockios.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_S390_SOCKIOS_H -#define _ASM_S390_SOCKIOS_H - -#include <asm-generic/sockios.h> - -#endif diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h deleted file mode 100644 index 71bf6ac6a2b9..000000000000 --- a/arch/s390/include/uapi/asm/termbits.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_S390_TERMBITS_H -#define _ASM_S390_TERMBITS_H - -#include <asm-generic/termbits.h> - -#endif diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 4384bc797a54..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..adb3fe2e3d42 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -51,14 +51,12 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -CFLAGS_sysinfo.o += -w - obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o -obj-y += entry.o reipl.o relocate_kernel.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o +obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index ae2cda5eee5a..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,3 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); +COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd1d5c62c374..d628afc26708 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -429,6 +429,20 @@ static void *nt_vmcoreinfo(void *ptr) } /* + * Initialize final note (needed for /proc/vmcore code) + */ +static void *nt_final(void *ptr) +{ + Elf64_Nhdr *note; + + note = (Elf64_Nhdr *) ptr; + note->n_namesz = 0; + note->n_descsz = 0; + note->n_type = 0; + return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); +} + +/* * Initialize ELF header (new kernel) */ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) @@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) if (sa->prefix != 0) ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); + ptr = nt_final(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; phdr->p_offset = notes_offset; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..5d20182ee8ae 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -231,9 +231,29 @@ static noinline __init void detect_machine_type(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +/* Remove leading, trailing and double whitespace. */ +static inline void strim_all(char *str) +{ + char *s; + + s = strim(str); + if (s != str) + memmove(str, s, strlen(s)); + while (*str) { + if (!isspace(*str++)) + continue; + if (isspace(*str)) { + s = skip_spaces(str); + memmove(str, s, strlen(s) + 1); + } + } +} + static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page; + char mstr[80], hvstr[17]; if (stsi(mach, 1, 1, 1)) return; @@ -241,14 +261,21 @@ static noinline __init void setup_arch_string(void) EBCASC(mach->type, sizeof(mach->type)); EBCASC(mach->model, sizeof(mach->model)); EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); - dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", - mach->manufacturer, - mach->type, - mach->model, - mach->model_capacity, - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); + strim_all(mstr); + if (stsi(vm, 3, 2, 2) == 0 && vm->count) { + EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); + sprintf(hvstr, "%-16.16s", vm->vm[0].cpi); + strim_all(hvstr); + } else { + sprintf(hvstr, "%s", + MACHINE_IS_LPAR ? "LPAR" : + MACHINE_IS_VM ? "z/VM" : + MACHINE_IS_KVM ? "KVM" : "unknown"); + } + dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } static __init void setup_topology(void) @@ -358,6 +385,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) @@ -375,7 +404,7 @@ static int __init topology_setup(char *str) rc = kstrtobool(str, &enabled); if (!rc && !enabled) - S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY; return rc; } early_param("topology", topology_setup); @@ -405,23 +434,16 @@ early_param("noexec", noexec_setup); static int __init cad_setup(char *str) { - int val; - - get_option(&str, &val); - if (val && test_facility(128)) - S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; - return 0; -} -early_param("cad", cad_setup); + bool enabled; + int rc; -static int __init cad_init(void) -{ - if (MACHINE_HAS_CAD) + rc = kstrtobool(str, &enabled); + if (!rc && enabled && test_facility(128)) /* Enable problem state CAD. */ __ctl_set_bit(2, 3); - return 0; + return rc; } -early_initcall(cad_init); +early_param("cad", cad_setup); static __init void memmove_early(void *dst, const void *src, size_t n) { diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dff2152350a7..a5f5d3bb3dbc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -189,8 +189,6 @@ ENTRY(__switch_to) stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP @@ -332,8 +330,15 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep +#ifdef CONFIG_LIVEPATCH + TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING + jo .Lsysc_patch_pending # handle live patching just before + # signals and possible syscall restart +#endif TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME @@ -409,6 +414,23 @@ ENTRY(system_call) #endif # +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb +# +# _TIF_PATCH_PENDING is set, call klp_update_patch_state +# +#ifdef CONFIG_LIVEPATCH +.Lsysc_patch_pending: + lg %r2,__LC_CURRENT # pass pointer to task struct + larl %r14,.Lsysc_return + jg klp_update_patch_state +#endif + +# # _PIF_PER_TRAP is set, call do_per_trap # .Lsysc_singlestep: @@ -490,7 +512,7 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f + j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 @@ -499,8 +521,8 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__THREAD_last_break(%r14) +3: stg %r10,__THREAD_last_break(%r14) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -509,14 +531,14 @@ ENTRY(pgm_check_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 4f + jz 5f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -4: REENABLE_IRQS +5: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -659,10 +681,16 @@ ENTRY(io_int_handler) jo .Lio_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lio_reschedule +#ifdef CONFIG_LIVEPATCH + TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING + jo .Lio_patch_pending +#endif TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -697,6 +725,18 @@ ENTRY(io_int_handler) jg load_fpu_regs # +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + +# # _TIF_NEED_RESCHED is set, call schedule # .Lio_reschedule: @@ -708,6 +748,16 @@ ENTRY(io_int_handler) j .Lio_return # +# _TIF_PATCH_PENDING is set, call klp_update_patch_state +# +#ifdef CONFIG_LIVEPATCH +.Lio_patch_pending: + lg %r2,__LC_CURRENT # pass pointer to task struct + larl %r14,.Lio_return + jg klp_update_patch_state +#endif + +# # _TIF_SIGPENDING or is set, call do_signal # .Lio_sigpending: diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 60a8a4e207ed..27477f34cc0a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -17,6 +17,7 @@ #include <trace/syscall.h> #include <asm/asm-offsets.h> #include <asm/cacheflush.h> +#include <asm/set_memory.h> #include "entry.h" /* diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/guarded_storage.h> +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 0b5ebf8a3d30..eff5b31671d4 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -25,7 +25,6 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/facility.h> #include <asm/page.h> #include <asm/ptrace.h> diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 482d3526e32b..31c91f24e562 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -52,7 +52,7 @@ ENTRY(startup_continue) .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization - .quad 0 # cr4: instruction authorization + .quad 0xffff # cr4: instruction authorization .quad .Lduct # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b67dafb7b7cf..e545ffe5155a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,6 +564,8 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c new file mode 100644 index 000000000000..ee85e17dd79d --- /dev/null +++ b/arch/s390/kernel/kdebugfs.c @@ -0,0 +1,15 @@ +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/init.h> + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int __init arch_kdebugfs_init(void) +{ + arch_debugfs_dir = debugfs_create_dir("s390", NULL); + if (IS_ERR(arch_debugfs_dir)) + arch_debugfs_dir = NULL; + return 0; +} +postcore_initcall(arch_kdebugfs_init); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 76f9eda1d7c0..3d6a99746454 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -31,7 +31,7 @@ #include <linux/slab.h> #include <linux/hardirq.h> #include <linux/ftrace.h> -#include <asm/cacheflush.h> +#include <asm/set_memory.h> #include <asm/sections.h> #include <linux/uaccess.h> #include <asm/dis.h> diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..49a6bd45957b 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -26,7 +26,9 @@ #include <asm/asm-offsets.h> #include <asm/cacheflush.h> #include <asm/os_info.h> +#include <asm/set_memory.h> #include <asm/switch_to.h> +#include <asm/nmi.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +104,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +118,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1aba10e90906..746d03423333 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,7 +1,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -22,19 +22,12 @@ #include <asm/irq.h> #include <asm/cpu_mf.h> -/* CPU-measurement counter facility supports these CPU counter sets: - * For CPU counter sets: - * Basic counter set: 0-31 - * Problem-state counter set: 32-63 - * Crypto-activity counter set: 64-127 - * Extented counter set: 128-159 - */ enum cpumf_ctr_set { - /* CPU counter sets */ - CPUMF_CTR_SET_BASIC = 0, - CPUMF_CTR_SET_USER = 1, - CPUMF_CTR_SET_CRYPTO = 2, - CPUMF_CTR_SET_EXT = 3, + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ /* Maximum number of counter sets */ CPUMF_CTR_SET_MAX, @@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { [CPUMF_CTR_SET_USER] = 0x04, [CPUMF_CTR_SET_CRYPTO] = 0x08, [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, }; static void ctr_set_enable(u64 *state, int ctr_set) @@ -76,19 +70,20 @@ struct cpu_hw_events { }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), }, .state = 0, .flags = 0, .txn_flags = 0, }; -static int get_counter_set(u64 event) +static enum cpumf_ctr_set get_counter_set(u64 event) { - int set = -1; + int set = CPUMF_CTR_SET_MAX; if (event < 32) set = CPUMF_CTR_SET_BASIC; @@ -98,34 +93,17 @@ static int get_counter_set(u64 event) set = CPUMF_CTR_SET_CRYPTO; else if (event < 256) set = CPUMF_CTR_SET_EXT; + else if (event >= 448 && event < 496) + set = CPUMF_CTR_SET_MT_DIAG; return set; } -static int validate_event(const struct hw_perf_event *hwc) -{ - switch (hwc->config_base) { - case CPUMF_CTR_SET_BASIC: - case CPUMF_CTR_SET_USER: - case CPUMF_CTR_SET_CRYPTO: - case CPUMF_CTR_SET_EXT: - /* check for reserved counters */ - if ((hwc->config >= 6 && hwc->config <= 31) || - (hwc->config >= 38 && hwc->config <= 63) || - (hwc->config >= 80 && hwc->config <= 127)) - return -EOPNOTSUPP; - break; - default: - return -EINVAL; - } - - return 0; -} - static int validate_ctr_version(const struct hw_perf_event *hwc) { struct cpu_hw_events *cpuhw; int err = 0; + u16 mtdiag_ctl; cpuhw = &get_cpu_var(cpu_hw_events); @@ -145,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) (cpuhw->info.csvn > 2 && hwc->config > 255)) err = -EOPNOTSUPP; break; + case CPUMF_CTR_SET_MT_DIAG: + if (cpuhw->info.csvn <= 3) + err = -EOPNOTSUPP; + /* + * MT-diagnostic counters are read-only. The counter set + * is automatically enabled and activated on all CPUs with + * multithreading (SMT). Deactivation of multithreading + * also disables the counter set. State changes are ignored + * by lcctl(). Because Linux controls SMT enablement through + * a kernel parameter only, the counter set is either disabled + * or enabled and active. + * + * Thus, the counters can only be used if SMT is on and the + * counter set is enabled and active. + */ + mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG]; + if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && + (cpuhw->info.enable_ctl & mtdiag_ctl) && + (cpuhw->info.act_ctl & mtdiag_ctl))) + err = -EOPNOTSUPP; + break; } put_cpu_var(cpu_hw_events); @@ -250,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* loss of counter data alert */ if (alert & CPU_MF_INT_CF_LCDA) pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); } #define PMC_INIT 0 @@ -330,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; + enum cpumf_ctr_set set; int err; u64 ev; @@ -370,25 +375,30 @@ static int __hw_perf_event_init(struct perf_event *event) if (ev == -1) return -ENOENT; - if (ev >= PERF_CPUM_CF_MAX_CTR) + if (ev > PERF_CPUM_CF_MAX_CTR) return -EINVAL; - /* Use the hardware perf event structure to store the counter number - * in 'config' member and the counter set to which the counter belongs - * in the 'config_base'. The counter set (config_base) is then used - * to enable/disable the counters. - */ - hwc->config = ev; - hwc->config_base = get_counter_set(ev); - - /* Validate the counter that is assigned to this event. - * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicitly - * validate event groups (event->group_leader != event). - */ - err = validate_event(hwc); - if (err) - return err; + /* Obtain the counter set to which the specified counter belongs */ + set = get_counter_set(ev); + switch (set) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + case CPUMF_CTR_SET_MT_DIAG: + /* + * Use the hardware perf event structure to store the + * counter number in the 'config' member and the counter + * set number in the 'config_base'. The counter set number + * is then later used to enable/disable the counter(s). + */ + hwc->config = ev; + hwc->config_base = set; + break; + case CPUMF_CTR_SET_MAX: + /* The counter could not be associated to a counter set */ + return -EINVAL; + }; /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { @@ -452,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event) return err; } -static int hw_perf_event_update(struct perf_event *event) +static void hw_perf_event_update(struct perf_event *event) { u64 prev, new, delta; int err; @@ -461,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event) prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) - goto out; + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; /* overflow */ local64_add(delta, &event->count); -out: - return err; } static void cpumf_pmu_read(struct perf_event *event) diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index c343ac2cf6c5..d3133285b7d1 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); -static struct attribute *cpumcf_pmu_event_attr[] = { +static struct attribute *cpumcf_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf, CPU_CYCLES), CPUMF_EVENT_PTR(cf, INSTRUCTIONS), CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), @@ -236,28 +292,87 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ -static struct attribute_group cpumsf_pmu_events_group = { +static struct attribute_group cpumcf_pmu_events_group = { .name = "events", - .attrs = cpumcf_pmu_event_attr, }; PMU_FORMAT_ATTR(event, "config:0-63"); -static struct attribute *cpumsf_pmu_format_attr[] = { +static struct attribute *cpumcf_pmu_format_attr[] = { &format_attr_event.attr, NULL, }; -static struct attribute_group cpumsf_pmu_format_group = { +static struct attribute_group cpumcf_pmu_format_group = { .name = "format", - .attrs = cpumsf_pmu_format_attr, + .attrs = cpumcf_pmu_format_attr, }; -static const struct attribute_group *cpumsf_pmu_attr_groups[] = { - &cpumsf_pmu_events_group, - &cpumsf_pmu_format_group, +static const struct attribute_group *cpumcf_pmu_attr_groups[] = { + &cpumcf_pmu_events_group, + &cpumcf_pmu_format_group, NULL, }; @@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a, __init const struct attribute_group **cpumf_cf_event_group(void) { struct attribute **combined, **model; + struct attribute *none[] = { NULL }; struct cpuid cpu_id; get_cpu_id(&cpu_id); @@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x2828: model = cpumcf_zec12_pmu_event_attr; break; + case 0x2964: + case 0x2965: + model = cpumcf_z13_pmu_event_attr; + break; default: - model = NULL; + model = none; break; } - if (!model) - goto out; - combined = merge_attr(cpumcf_pmu_event_attr, model); if (combined) - cpumsf_pmu_events_group.attrs = combined; -out: - return cpumsf_pmu_attr_groups; + cpumcf_pmu_events_group.attrs = combined; + return cpumcf_pmu_attr_groups; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c0b58545c04..ca960d0370d5 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -823,7 +823,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) } /* Check online status of the CPU to which the event is pinned */ - if (event->cpu >= nr_cpumask_bits || + if ((unsigned int)event->cpu >= nr_cpumask_bits || (event->cpu >= 0 && !cpu_online(event->cpu))) return -ENODEV; @@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) * sample. Some early samples or samples from guests without * lpp usage would be misaccounted to the host. We use the asn * value as an addon heuristic to detect most of these guest samples. - * If the value differs from the host hpp value, we assume to be a - * KVM guest. + * If the value differs from 0xffff (the host value), we assume to + * be a KVM guest. */ switch (sfr->basic.CL) { case 1: /* logical partition */ @@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) sde_regs->in_guest = 1; break; default: /* old machine, use heuristics */ - if (sfr->basic.gpp || - sfr->basic.prim_asn != (u16)sfr->basic.hpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) sde_regs->in_guest = 1; break; } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 20cd339e11ae..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -124,7 +126,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; + p->thread.guest_timer = 0; p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ @@ -156,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..778cd6536175 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/cpufeature.h> +#include <linux/bitops.h> #include <linux/kernel.h> #include <linux/sched/mm.h> #include <linux/init.h> @@ -91,11 +92,23 @@ int cpu_have_feature(unsigned int num) } EXPORT_SYMBOL(cpu_have_feature); +static void show_facilities(struct seq_file *m) +{ + unsigned int bit; + long *facilities; + + facilities = (long *)&S390_lowcore.stfle_fac_list; + seq_puts(m, "facilities :"); + for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT) + seq_printf(m, " %d", bit); + seq_putc(m, '\n'); +} + static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" @@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); + show_facilities(m); show_cacheinfo(m); for_each_online_cpu(cpu) { struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..488c5bb8dc77 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; - + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,74 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_bc_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1274,22 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1518,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..363000a77ffc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include <asm/os_info.h> #include <asm/sigp.h> #include <asm/idle.h> +#include <asm/nmi.h> #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,25 +920,33 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) { struct pcpu *pcpu = pcpu_devices; + WARN_ON(!cpu_present(0) || !cpu_online(0)); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = stap(); pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); - set_cpu_present(0, true); - set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) @@ -924,6 +955,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { + pcpu_devices[0].address = stap(); S390_lowcore.cpu_nr = 0; S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8f..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ +SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 12b6b138e354..eefcb54872a5 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -4,6 +4,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/proc_fs.h> @@ -13,6 +14,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/ebcdic.h> +#include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> @@ -485,3 +487,99 @@ void calibrate_delay(void) "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); } + +#ifdef CONFIG_DEBUG_FS + +#define STSI_FILE(fc, s1, s2) \ +static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\ +{ \ + file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \ + if (!file->private_data) \ + return -ENOMEM; \ + if (stsi(file->private_data, fc, s1, s2)) { \ + free_page((unsigned long)file->private_data); \ + file->private_data = NULL; \ + return -EACCES; \ + } \ + return nonseekable_open(inode, file); \ +} \ + \ +static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ + .open = stsi_open_##fc##_##s1##_##s2, \ + .release = stsi_release, \ + .read = stsi_read, \ + .llseek = no_llseek, \ +}; + +static int stsi_release(struct inode *inode, struct file *file) +{ + free_page((unsigned long)file->private_data); + return 0; +} + +static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) +{ + return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE); +} + +STSI_FILE( 1, 1, 1); +STSI_FILE( 1, 2, 1); +STSI_FILE( 1, 2, 2); +STSI_FILE( 2, 2, 1); +STSI_FILE( 2, 2, 2); +STSI_FILE( 3, 2, 2); +STSI_FILE(15, 1, 2); +STSI_FILE(15, 1, 3); +STSI_FILE(15, 1, 4); +STSI_FILE(15, 1, 5); +STSI_FILE(15, 1, 6); + +struct stsi_file { + const struct file_operations *fops; + char *name; +}; + +static struct stsi_file stsi_file[] __initdata = { + {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"}, + {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"}, + {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"}, + {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"}, + {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"}, + {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"}, + {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"}, + {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"}, + {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"}, + {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"}, + {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"}, +}; + +static u8 stsi_0_0_0; + +static __init int stsi_init_debugfs(void) +{ + struct dentry *stsi_root; + struct stsi_file *sf; + int lvl, i; + + stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); + if (IS_ERR_OR_NULL(stsi_root)) + return 0; + lvl = stsi(NULL, 0, 0, 0); + if (lvl > 0) + stsi_0_0_0 = lvl; + debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0); + for (i = 0; i < ARRAY_SIZE(stsi_file); i++) { + sf = &stsi_file[i]; + debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); + } + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + char link_to[10]; + + sprintf(link_to, "15_1_%d", topology_mnest_limit()); + debugfs_create_symlink("topology", stsi_root, link_to); + } + return 0; +} +device_initcall(stsi_init_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c31da46bc037..c3a52f9a69a0 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -158,7 +158,9 @@ void init_cpu_timer(void) cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; + cd->min_delta_ticks = 1; cd->max_delta_ns = LONG_MAX; + cd->max_delta_ticks = ULONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); cd->set_next_event = s390_next_event; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 17660e800e74..bb47c92476f0 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu) return mask; } +#define TOPOLOGY_CORE_BITS 64 + static void add_cpus_to_mask(struct topology_core *tl_core, struct mask_info *drawer, struct mask_info *book, @@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core, struct cpu_topology_s390 *topo; unsigned int core; - for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { + for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { unsigned int rcore; int lcpu, i; @@ -244,7 +246,7 @@ static void update_cpu_masks(void) void store_topology(struct sysinfo_15_1_x *info) { - stsi(info, 15, 1, min(topology_max_mnest, 4)); + stsi(info, 15, 1, topology_mnest_limit()); } static int __arch_update_cpu_topology(void) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5ccf95396251..72307f108c40 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -63,11 +63,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); __start_ro_after_init = .; - __start_data_ro_after_init = .; .data..ro_after_init : { *(.data..ro_after_init) } - __end_data_ro_after_init = .; EXCEPTION_TABLE(16) . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c14fc9029912..072d84ba42a3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime) } static void account_system_index_scaled(struct task_struct *p, - cputime_t cputime, cputime_t scaled, + u64 cputime, u64 scaled, enum cpu_usage_stat index) { p->stimescaled += cputime_to_nsecs(scaled); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d55c829a5944..9da243d94cc3 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -168,8 +168,7 @@ union page_table_entry { unsigned long z : 1; /* Zero Bit */ unsigned long i : 1; /* Page-Invalid Bit */ unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long co : 1; /* Change-Recording Override */ - unsigned long : 8; + unsigned long : 9; }; }; @@ -262,7 +261,7 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) { + if (vcpu->arch.sie_block->eca & ECA_SII) { int rc; read_lock(&vcpu->kvm->arch.sca_lock); @@ -361,7 +360,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) void ipte_lock(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) + if (vcpu->arch.sie_block->eca & ECA_SII) ipte_lock_siif(vcpu); else ipte_lock_simple(vcpu); @@ -369,7 +368,7 @@ void ipte_lock(struct kvm_vcpu *vcpu) void ipte_unlock(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) + if (vcpu->arch.sie_block->eca & ECA_SII) ipte_unlock_siif(vcpu); else ipte_unlock_simple(vcpu); @@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_PAGE_TRANSLATION; if (pte.z) return PGM_TRANSLATION_SPEC; - if (pte.co && !edat1) - return PGM_TRANSLATION_SPEC; dat_protection |= pte.p; raddr.pfra = pte.pfra; real_address: @@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; - if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) + if (!rc && pte.z) rc = PGM_TRANSLATION_SPEC; shadow_page: pte.p |= dat_protection; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 59920f96ebc0..a4752bf6b526 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = { [0xb6] = kvm_s390_handle_stctl, [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, + [0xe3] = kvm_s390_handle_e3, [0xe5] = kvm_s390_handle_e5, [0xeb] = kvm_s390_handle_eb, }; @@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); - if (vcpu->arch.sie_block->ipa == 0xb256 && - test_kvm_facility(vcpu->kvm, 74)) + if (vcpu->arch.sie_block->ipa == 0xb256) return handle_sthyi(vcpu); if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) @@ -404,28 +404,31 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; switch (vcpu->arch.sie_block->icptcode) { - case 0x10: - case 0x18: + case ICPT_EXTREQ: + case ICPT_IOREQ: return handle_noop(vcpu); - case 0x04: + case ICPT_INST: rc = handle_instruction(vcpu); break; - case 0x08: + case ICPT_PROGI: return handle_prog(vcpu); - case 0x14: + case ICPT_EXTINT: return handle_external_interrupt(vcpu); - case 0x1c: + case ICPT_WAIT: return kvm_s390_handle_wait(vcpu); - case 0x20: + case ICPT_VALIDITY: return handle_validity(vcpu); - case 0x28: + case ICPT_STOP: return handle_stop(vcpu); - case 0x2c: + case ICPT_OPEREXC: rc = handle_operexc(vcpu); break; - case 0x38: + case ICPT_PARTEXEC: rc = handle_partial_execution(vcpu); break; + case ICPT_KSS: + rc = kvm_s390_skey_check_enable(vcpu); + break; default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..caf15c8a8948 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk) { unsigned long ext_sa_addr; + unsigned long lc; freg_t fprs[NUM_FPRS]; union mci mci; int rc; @@ -418,12 +419,34 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* take care of lazy register loading */ save_fpu_regs(); save_access_regs(vcpu->run->s.regs.acrs); + if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); /* Extended save area */ - rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, - sizeof(unsigned long)); - /* Only bits 0-53 are used for address formation */ - ext_sa_addr &= ~0x3ffUL; + rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, + sizeof(unsigned long)); + /* Only bits 0 through 63-LC are used for address formation */ + lc = ext_sa_addr & MCESA_LC_MASK; + if (test_kvm_facility(vcpu->kvm, 133)) { + switch (lc) { + case 0: + case 10: + ext_sa_addr &= ~0x3ffUL; + break; + case 11: + ext_sa_addr &= ~0x7ffUL; + break; + case 12: + ext_sa_addr &= ~0xfffUL; + break; + default: + ext_sa_addr = 0; + break; + } + } else { + ext_sa_addr &= ~0x3ffUL; + } + if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, 512)) @@ -431,6 +454,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, } else { mci.vr = 0; } + if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133) + && (lc == 11 || lc == 12)) { + if (write_guest_abs(vcpu, ext_sa_addr + 1024, + &vcpu->run->s.regs.gscb, 32)) + mci.gs = 0; + } else { + mci.gs = 0; + } /* General interruption information */ rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); @@ -1968,6 +1999,8 @@ static int register_io_adapter(struct kvm_device *dev, adapter->maskable = adapter_info.maskable; adapter->masked = false; adapter->swap = adapter_info.swap; + adapter->suppressible = (adapter_info.flags) & + KVM_S390_ADAPTER_SUPPRESSIBLE; dev->kvm->arch.adapters[adapter->id] = adapter; return 0; @@ -2121,6 +2154,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) return 0; } +static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_ais_req req; + int ret = 0; + + if (!fi->ais_enabled) + return -ENOTSUPP; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + if (req.isc > MAX_ISC) + return -EINVAL; + + trace_kvm_s390_modify_ais_mode(req.isc, + (fi->simm & AIS_MODE_MASK(req.isc)) ? + (fi->nimm & AIS_MODE_MASK(req.isc)) ? + 2 : KVM_S390_AIS_MODE_SINGLE : + KVM_S390_AIS_MODE_ALL, req.mode); + + mutex_lock(&fi->ais_lock); + switch (req.mode) { + case KVM_S390_AIS_MODE_ALL: + fi->simm &= ~AIS_MODE_MASK(req.isc); + fi->nimm &= ~AIS_MODE_MASK(req.isc); + break; + case KVM_S390_AIS_MODE_SINGLE: + fi->simm |= AIS_MODE_MASK(req.isc); + fi->nimm &= ~AIS_MODE_MASK(req.isc); + break; + default: + ret = -EINVAL; + } + mutex_unlock(&fi->ais_lock); + + return ret; +} + +static int kvm_s390_inject_airq(struct kvm *kvm, + struct s390_io_adapter *adapter) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + int ret = 0; + + if (!fi->ais_enabled || !adapter->suppressible) + return kvm_s390_inject_vm(kvm, &s390int); + + mutex_lock(&fi->ais_lock); + if (fi->nimm & AIS_MODE_MASK(adapter->isc)) { + trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc); + goto out; + } + + ret = kvm_s390_inject_vm(kvm, &s390int); + if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) { + fi->nimm |= AIS_MODE_MASK(adapter->isc); + trace_kvm_s390_modify_ais_mode(adapter->isc, + KVM_S390_AIS_MODE_SINGLE, 2); + } +out: + mutex_unlock(&fi->ais_lock); + return ret; +} + +static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr) +{ + unsigned int id = attr->attr; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter) + return -EINVAL; + + return kvm_s390_inject_airq(kvm, adapter); +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -2157,6 +2271,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_FLIC_CLEAR_IO_IRQ: r = clear_io_irq(dev->kvm, attr); break; + case KVM_DEV_FLIC_AISM: + r = modify_ais_mode(dev->kvm, attr); + break; + case KVM_DEV_FLIC_AIRQ_INJECT: + r = flic_inject_airq(dev->kvm, attr); + break; default: r = -EINVAL; } @@ -2176,6 +2296,8 @@ static int flic_has_attr(struct kvm_device *dev, case KVM_DEV_FLIC_ADAPTER_REGISTER: case KVM_DEV_FLIC_ADAPTER_MODIFY: case KVM_DEV_FLIC_CLEAR_IO_IRQ: + case KVM_DEV_FLIC_AISM: + case KVM_DEV_FLIC_AIRQ_INJECT: return 0; } return -ENXIO; @@ -2286,12 +2408,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, ret = adapter_indicators_set(kvm, adapter, &e->adapter); up_read(&adapter->maps_lock); if ((ret > 0) && !adapter->masked) { - struct kvm_s390_interrupt s390int = { - .type = KVM_S390_INT_IO(1, 0, 0, 0), - .parm = 0, - .parm64 = (adapter->isc << 27) | 0x80000000, - }; - ret = kvm_s390_inject_vm(kvm, &s390int); + ret = kvm_s390_inject_airq(kvm, adapter); if (ret == 0) ret = 1; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fd6cd05bb6a7..689ac48361c6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -273,9 +273,13 @@ static void kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.pcc); } if (test_facility(57)) /* MSA5 */ - __cpacf_query(CPACF_PPNO, (cpacf_mask_t *) + __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) kvm_s390_available_subfunc.ppno); + if (test_facility(146)) /* MSA8 */ + __cpacf_query(CPACF_KMA, (cpacf_mask_t *) + kvm_s390_available_subfunc.kma); + if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* @@ -300,6 +304,8 @@ static void kvm_s390_cpu_feat_init(void) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); if (sclp.has_ibs) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); + if (sclp.has_kss) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); /* * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make * all skey handling functions read/set the skey from the PGSTE @@ -380,6 +386,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_AIS: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -405,6 +412,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_RI: r = test_facility(64); break; + case KVM_CAP_S390_GS: + r = test_facility(133); + break; default: r = 0; } @@ -541,6 +551,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_AIS: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + r = -EBUSY; + } else { + set_kvm_facility(kvm->arch.model.fac_mask, 72); + set_kvm_facility(kvm->arch.model.fac_list, 72); + kvm->arch.float_int.ais_enabled = 1; + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: AIS %s", + r ? "(not available)" : "(success)"); + break; + case KVM_CAP_S390_GS: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(133)) { + set_kvm_facility(kvm->arch.model.fac_mask, 133); + set_kvm_facility(kvm->arch.model.fac_list, 133); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -1166,10 +1204,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kmalloc_array(args->count, sizeof(uint8_t), - GFP_KERNEL | __GFP_NOWARN); - if (!keys) - keys = vmalloc(sizeof(uint8_t) * args->count); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); if (!keys) return -ENOMEM; @@ -1211,10 +1246,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kmalloc_array(args->count, sizeof(uint8_t), - GFP_KERNEL | __GFP_NOWARN); - if (!keys) - keys = vmalloc(sizeof(uint8_t) * args->count); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); if (!keys) return -ENOMEM; @@ -1498,6 +1530,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_crypto_init(kvm); + mutex_init(&kvm->arch.float_int.ais_lock); + kvm->arch.float_int.simm = 0; + kvm->arch.float_int.nimm = 0; + kvm->arch.float_int.ais_enabled = 0; spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); @@ -1512,9 +1548,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { if (sclp.hamax == U64_MAX) - kvm->arch.mem_limit = TASK_MAX_SIZE; + kvm->arch.mem_limit = TASK_SIZE_MAX; else - kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, sclp.hamax + 1); kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) @@ -1646,7 +1682,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu) sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; - vcpu->arch.sie_block->ecb2 |= 0x04U; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; @@ -1700,7 +1736,7 @@ static int sca_switch_to_extended(struct kvm *kvm) kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { vcpu->arch.sie_block->scaoh = scaoh; vcpu->arch.sie_block->scaol = scaol; - vcpu->arch.sie_block->ecb2 |= 0x04U; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; } kvm->arch.sca = new_sca; kvm->arch.use_esca = 1; @@ -1749,6 +1785,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 133)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. */ @@ -1939,8 +1977,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; - vcpu->arch.sie_block->ecb2 |= 0x80; - vcpu->arch.sie_block->ecb2 &= ~0x08; + vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; + vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; return 0; } @@ -1970,31 +2008,37 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ if (MACHINE_HAS_ESOP) - vcpu->arch.sie_block->ecb |= 0x02; + vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) - vcpu->arch.sie_block->ecb |= 0x04; + vcpu->arch.sie_block->ecb |= ECB_SRSI; if (test_kvm_facility(vcpu->kvm, 73)) - vcpu->arch.sie_block->ecb |= 0x10; + vcpu->arch.sie_block->ecb |= ECB_TE; if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) - vcpu->arch.sie_block->ecb2 |= 0x08; + vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; if (test_kvm_facility(vcpu->kvm, 130)) - vcpu->arch.sie_block->ecb2 |= 0x20; - vcpu->arch.sie_block->eca = 0x1002000U; + vcpu->arch.sie_block->ecb2 |= ECB2_IEP; + vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; if (sclp.has_cei) - vcpu->arch.sie_block->eca |= 0x80000000U; + vcpu->arch.sie_block->eca |= ECA_CEI; if (sclp.has_ib) - vcpu->arch.sie_block->eca |= 0x40000000U; + vcpu->arch.sie_block->eca |= ECA_IB; if (sclp.has_siif) - vcpu->arch.sie_block->eca |= 1; + vcpu->arch.sie_block->eca |= ECA_SII; if (sclp.has_sigpif) - vcpu->arch.sie_block->eca |= 0x10000000U; + vcpu->arch.sie_block->eca |= ECA_SIGPI; if (test_kvm_facility(vcpu->kvm, 129)) { - vcpu->arch.sie_block->eca |= 0x00020000; - vcpu->arch.sie_block->ecd |= 0x20000000; + vcpu->arch.sie_block->eca |= ECA_VX; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; } + vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) + | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; - vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + + if (sclp.has_kss) + atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); + else + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); @@ -2446,7 +2490,7 @@ retry: } /* nothing to do, just clear the request */ - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return 0; } @@ -2719,6 +2763,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct runtime_instr_cb *riccb; + struct gs_cb *gscb; + + riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; + gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) @@ -2747,12 +2796,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * we should enable RI here instead of doing the lazy enablement. */ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && - test_kvm_facility(vcpu->kvm, 64)) { - struct runtime_instr_cb *riccb = - (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; - - if (riccb->valid) - vcpu->arch.sie_block->ecb3 |= 0x01; + test_kvm_facility(vcpu->kvm, 64) && + riccb->valid && + !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); + vcpu->arch.sie_block->ecb3 |= ECB3_RI; + } + /* + * If userspace sets the gscb (e.g. after migration) to non-zero, + * we should enable GS here instead of doing the lazy enablement. + */ + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && + test_kvm_facility(vcpu->kvm, 133) && + gscb->gssm && + !vcpu->arch.gs_enabled) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); @@ -2768,6 +2829,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; + if (MACHINE_HAS_GS) { + preempt_disable(); + __ctl_set_bit(2, 4); + if (current->thread.gs_cb) { + vcpu->arch.host_gscb = current->thread.gs_cb; + save_gs_cb(vcpu->arch.host_gscb); + } + if (vcpu->arch.gs_enabled) { + current->thread.gs_cb = (struct gs_cb *) + &vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + } + preempt_enable(); + } kvm_run->kvm_dirty_regs = 0; } @@ -2794,6 +2869,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Restore will be done lazily at return */ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; + if (MACHINE_HAS_GS) { + __ctl_set_bit(2, 4); + if (vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); + preempt_disable(); + current->thread.gs_cb = vcpu->arch.host_gscb; + restore_gs_cb(vcpu->arch.host_gscb); + preempt_enable(); + if (!vcpu->arch.host_gscb) + __ctl_clear_bit(2, 4); + vcpu->arch.host_gscb = NULL; + } } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index af9fa91a0c91..55f5c8457d6d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -25,7 +25,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* Transactional Memory Execution related macros */ -#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) +#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) @@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) int is_valid_psw(psw_t *psw); int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); @@ -253,6 +254,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); +int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu); /* implemented in vsie.c */ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 64b6a309f2c4..c03106c428cf 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -37,7 +37,8 @@ static int handle_ri(struct kvm_vcpu *vcpu) { if (test_kvm_facility(vcpu->kvm, 64)) { - vcpu->arch.sie_block->ecb3 |= 0x01; + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)"); + vcpu->arch.sie_block->ecb3 |= ECB3_RI; kvm_s390_retry_instr(vcpu); return 0; } else @@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_gs(struct kvm_vcpu *vcpu) +{ + if (test_kvm_facility(vcpu->kvm, 133)) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); + preempt_disable(); + __ctl_set_bit(2, 4); + current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + preempt_enable(); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; + kvm_s390_retry_instr(vcpu); + return 0; + } else + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) +{ + int code = vcpu->arch.sie_block->ipb & 0xff; + + if (code == 0x49 || code == 0x4d) + return handle_gs(vcpu); + else + return -EOPNOTSUPP; +} /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { @@ -170,18 +198,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) return 0; } -static int __skey_check_enable(struct kvm_vcpu *vcpu) +int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) { int rc = 0; + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; trace_kvm_s390_skey_related_inst(vcpu); - if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) + if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) && + !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)) return rc; rc = s390_enable_skey(); VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); - if (!rc) - vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + if (!rc) { + if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS) + atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags); + else + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | + ICTL_RRBE); + } return rc; } @@ -190,7 +225,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_storage_key++; - rc = __skey_check_enable(vcpu); + rc = kvm_s390_skey_check_enable(vcpu); if (rc) return rc; if (sclp.has_skey) { @@ -759,6 +794,7 @@ static const intercept_handler_t b2_handlers[256] = { [0x3b] = handle_io_inst, [0x3c] = handle_io_inst, [0x50] = handle_ipte_interlock, + [0x56] = handle_sthyi, [0x5f] = handle_io_inst, [0x74] = handle_io_inst, [0x76] = handle_io_inst, @@ -887,7 +923,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { - int rc = __skey_check_enable(vcpu); + int rc = kvm_s390_skey_check_enable(vcpu); if (rc) return rc; diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 05c98bb853cf..926b5244263e 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) u64 code, addr, cc = 0; struct sthyi_sctns *sctns = NULL; + if (!test_kvm_facility(vcpu->kvm, 74)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + /* * STHYI requires extensive locking in the higher hypervisors * and is very computational/memory expensive. Therefore we diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 396485bca191..78b7e847984a 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs, __entry->state ? "enabling" : "disabling", __entry->id) ); +/* + * Trace point for modifying ais mode for a given isc. + */ +TRACE_EVENT(kvm_s390_modify_ais_mode, + TP_PROTO(__u8 isc, __u16 from, __u16 to), + TP_ARGS(isc, from, to), + + TP_STRUCT__entry( + __field(__u8, isc) + __field(__u16, from) + __field(__u16, to) + ), + + TP_fast_assign( + __entry->isc = isc; + __entry->from = from; + __entry->to = to; + ), + + TP_printk("for isc %x, modifying interruption mode from %s to %s", + __entry->isc, + (__entry->from == KVM_S390_AIS_MODE_ALL) ? + "ALL-Interruptions Mode" : + (__entry->from == KVM_S390_AIS_MODE_SINGLE) ? + "Single-Interruption Mode" : "No-Interruptions Mode", + (__entry->to == KVM_S390_AIS_MODE_ALL) ? + "ALL-Interruptions Mode" : + (__entry->to == KVM_S390_AIS_MODE_SINGLE) ? + "Single-Interruption Mode" : "No-Interruptions Mode") + ); + +/* + * Trace point for suppressed adapter I/O interrupt. + */ +TRACE_EVENT(kvm_s390_airq_suppressed, + TP_PROTO(__u32 id, __u8 isc), + TP_ARGS(id, isc), + + TP_STRUCT__entry( + __field(__u32, id) + __field(__u8, isc) + ), + + TP_fast_assign( + __entry->id = id; + __entry->isc = isc; + ), + + TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)", + __entry->id, __entry->isc) + ); + #endif /* _TRACE_KVMS390_H */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5491be39776b..4719ecb9ab42 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -117,6 +117,8 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) newflags |= cpuflags & CPUSTAT_SM; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) newflags |= cpuflags & CPUSTAT_IBS; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS)) + newflags |= cpuflags & CPUSTAT_KSS; atomic_set(&scb_s->cpuflags, newflags); return 0; @@ -249,7 +251,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - bool had_tx = scb_s->ecb & 0x10U; + bool had_tx = scb_s->ecb & ECB_TE; unsigned long new_mso = 0; int rc; @@ -289,7 +291,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * bits. Therefore we cannot provide interpretation and would later * have to provide own emulation handlers. */ - scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS)) + scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + scb_s->icpua = scb_o->icpua; if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) @@ -307,34 +311,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ihcpu = scb_o->ihcpu; /* MVPG and Protection Exception Interpretation are always available */ - scb_s->eca |= scb_o->eca & 0x01002000U; + scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI); /* Host-protection-interruption introduced with ESOP */ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) - scb_s->ecb |= scb_o->ecb & 0x02U; + scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; /* transactional execution */ if (test_kvm_facility(vcpu->kvm, 73)) { /* remap the prefix is tx is toggled on */ - if ((scb_o->ecb & 0x10U) && !had_tx) + if ((scb_o->ecb & ECB_TE) && !had_tx) prefix_unmapped(vsie_page); - scb_s->ecb |= scb_o->ecb & 0x10U; + scb_s->ecb |= scb_o->ecb & ECB_TE; } /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { - scb_s->eca |= scb_o->eca & 0x00020000U; - scb_s->ecd |= scb_o->ecd & 0x20000000U; + scb_s->eca |= scb_o->eca & ECA_VX; + scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; } /* Run-time-Instrumentation */ if (test_kvm_facility(vcpu->kvm, 64)) - scb_s->ecb3 |= scb_o->ecb3 & 0x01U; + scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI; /* Instruction Execution Prevention */ if (test_kvm_facility(vcpu->kvm, 130)) - scb_s->ecb2 |= scb_o->ecb2 & 0x20U; + scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP; + /* Guarded Storage */ + if (test_kvm_facility(vcpu->kvm, 133)) { + scb_s->ecb |= scb_o->ecb & ECB_GS; + scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; + } if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) - scb_s->eca |= scb_o->eca & 0x00000001U; + scb_s->eca |= scb_o->eca & ECA_SII; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) - scb_s->eca |= scb_o->eca & 0x40000000U; + scb_s->eca |= scb_o->eca & ECA_IB; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) - scb_s->eca |= scb_o->eca & 0x80000000U; + scb_s->eca |= scb_o->eca & ECA_CEI; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -406,7 +415,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix += scb_s->mso; rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); - if (!rc && (scb_s->ecb & 0x10U)) + if (!rc && (scb_s->ecb & ECB_TE)) rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix + PAGE_SIZE); /* @@ -496,6 +505,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) unpin_guest_page(vcpu->kvm, gpa, hpa); scb_s->riccbd = 0; } + + hpa = scb_s->sdnxo; + if (hpa) { + gpa = scb_o->sdnxo; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->sdnxo = 0; + } } /* @@ -543,7 +559,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->itdba & ~0xffUL; - if (gpa && (scb_s->ecb & 0x10U)) { + if (gpa && (scb_s->ecb & ECB_TE)) { if (!(gpa & ~0x1fffU)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; @@ -558,8 +574,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->gvrd & ~0x1ffUL; - if (gpa && (scb_s->eca & 0x00020000U) && - !(scb_s->ecd & 0x20000000U)) { + if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x1310U); goto unpin; @@ -577,7 +592,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->riccbd & ~0x3fUL; - if (gpa && (scb_s->ecb3 & 0x01U)) { + if (gpa && (scb_s->ecb3 & ECB3_RI)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0043U); goto unpin; @@ -591,6 +606,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) goto unpin; scb_s->riccbd = hpa; } + if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { + unsigned long sdnxc; + + gpa = scb_o->sdnxo & ~0xfUL; + sdnxc = scb_o->sdnxo & 0xfUL; + if (!gpa || !(gpa & ~0x1fffUL)) { + rc = set_validity_icpt(scb_s, 0x10b0U); + goto unpin; + } + if (sdnxc < 6 || sdnxc > 12) { + rc = set_validity_icpt(scb_s, 0x10b1U); + goto unpin; + } + if (gpa & ((1 << sdnxc) - 1)) { + rc = set_validity_icpt(scb_s, 0x10b2U); + goto unpin; + } + /* Due to alignment rules (checked above) this cannot + * cross page boundaries + */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x10b0U); + if (rc) + goto unpin; + scb_s->sdnxo = hpa | sdnxc; + } return 0; unpin: unpin_blocks(vcpu, vsie_page); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ba427eb6f14c..ffb15bd4c593 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -17,7 +17,7 @@ int spin_retry = -1; static int __init spin_retry_init(void) { if (spin_retry < 0) - spin_retry = MACHINE_HAS_CAD ? 10 : 1000; + spin_retry = 1000; return 0; } early_initcall(spin_retry_init); @@ -32,23 +32,17 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) -{ - asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count, first_diag; + int cpu = SPINLOCK_LOCKVAL; + int owner, count, first_diag; first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return; continue; } @@ -61,8 +55,6 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) /* Loop for a while on the lock value. */ count = spin_retry; do { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -82,9 +74,8 @@ EXPORT_SYMBOL(arch_spin_lock_wait); void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count, first_diag; + int cpu = SPINLOCK_LOCKVAL; + int owner, count, first_diag; local_irq_restore(flags); first_diag = 1; @@ -93,7 +84,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) /* Try to get the lock if it is free. */ if (!owner) { local_irq_disable(); - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return; local_irq_restore(flags); continue; @@ -107,8 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) /* Loop for a while on the lock value. */ count = spin_retry; do { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -128,18 +117,16 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count; + int cpu = SPINLOCK_LOCKVAL; + int owner, count; for (count = spin_retry; count > 0; count--) { owner = READ_ONCE(lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return 1; - } else if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); + } } return 0; } @@ -147,8 +134,8 @@ EXPORT_SYMBOL(arch_spin_trylock_retry); void _raw_read_lock_wait(arch_rwlock_t *rw) { - unsigned int owner, old; int count = spin_retry; + int owner, old; #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); @@ -162,12 +149,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if ((int) old < 0) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old < 0) continue; - } - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) + if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) return; } } @@ -175,17 +159,14 @@ EXPORT_SYMBOL(_raw_read_lock_wait); int _raw_read_trylock_retry(arch_rwlock_t *rw) { - unsigned int old; int count = spin_retry; + int old; while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if ((int) old < 0) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old < 0) continue; - } - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) + if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) return 1; } return 0; @@ -194,10 +175,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES -void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) +void _raw_write_lock_wait(arch_rwlock_t *rw, int prev) { - unsigned int owner, old; int count = spin_retry; + int owner, old; owner = 0; while (1) { @@ -209,14 +190,12 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); smp_mb(); - if ((int) old >= 0) { + if (old >= 0) { prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); old = prev; } - if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + if ((old & 0x7fffffff) == 0 && prev >= 0) break; - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -225,8 +204,8 @@ EXPORT_SYMBOL(_raw_write_lock_wait); void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int owner, old, prev; int count = spin_retry; + int owner, old, prev; prev = 0x80000000; owner = 0; @@ -238,15 +217,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if ((int) old >= 0 && - _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) + if (old >= 0 && + __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000)) prev = old; else smp_mb(); - if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + if ((old & 0x7fffffff) == 0 && prev >= 0) break; - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -255,24 +232,21 @@ EXPORT_SYMBOL(_raw_write_lock_wait); int _raw_write_trylock_retry(arch_rwlock_t *rw) { - unsigned int old; int count = spin_retry; + int old; while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if (old) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old) continue; - } - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) + if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)) return 1; } return 0; } EXPORT_SYMBOL(_raw_write_trylock_retry); -void arch_lock_relax(unsigned int cpu) +void arch_lock_relax(int cpu) { if (!cpu) return; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index f481fcde067b..1e5bb2b86c42 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -26,7 +26,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr tmp1 = -4096UL; asm volatile( "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" - "9: jz 7f\n" + "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" " slgr %2,%3\n" @@ -35,23 +35,13 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 4f\n" + " jnh 5f\n" "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" - "10:slgr %0,%4\n" - " algr %2,%4\n" - "4: lghi %4,-1\n" - " algr %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,6f\n" /* memset loop */ - " xc 0(1,%2),0(%2)\n" - "5: xc 0(256,%2),0(%2)\n" - " la %2,256(%2)\n" - "6: aghi %4,-256\n" - " jnm 5b\n" - " ex %4,0(%3)\n" - " j 8f\n" - "7: slgr %0,%0\n" - "8:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) + "7: slgr %0,%4\n" + " j 5f\n" + "4: slgr %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : "d" (reg0) : "cc", "memory"); return size; @@ -67,49 +57,38 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, asm volatile( " sacf 0\n" "0: mvcp 0(%0,%2),0(%1),%3\n" - "10:jz 8f\n" + "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" "2: mvcp 0(%0,%2),0(%1),%3\n" - "11:jnz 1b\n" - " j 8f\n" + "8: jnz 1b\n" + " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ " lghi %3,-4096\n" " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" + " jnh 6f\n" "4: mvcp 0(%4,%2),0(%1),%3\n" - "12:slgr %0,%4\n" - " algr %2,%4\n" - "5: lghi %4,-1\n" - " algr %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,7f\n" /* memset loop */ - " xc 0(1,%2),0(%2)\n" - "6: xc 0(256,%2),0(%2)\n" - " la %2,256(%2)\n" - "7: aghi %4,-256\n" - " jnm 6b\n" - " ex %4,0(%3)\n" - " j 9f\n" - "8: slgr %0,%0\n" - "9: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) - EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) + "9: slgr %0,%4\n" + " j 6f\n" + "5: slgr %0,%0\n" + "6: sacf 768\n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); return size; } -unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) +unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - check_object_size(to, n, false); if (static_branch_likely(&have_mvcos)) return copy_from_user_mvcos(to, from, n); return copy_from_user_mvcp(to, from, n); } -EXPORT_SYMBOL(__copy_from_user); +EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) @@ -176,14 +155,13 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, return size; } -unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) +unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - check_object_size(from, n, true); if (static_branch_likely(&have_mvcos)) return copy_to_user_mvcos(to, from, n); return copy_to_user_mvcs(to, from, n); } -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) @@ -240,13 +218,13 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user return size; } -unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) +unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { if (static_branch_likely(&have_mvcos)) return copy_in_user_mvcos(to, from, n); return copy_in_user_mvc(to, from, n); } -EXPORT_SYMBOL(__copy_in_user); +EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a07b1ec1391d..7f6db1e6c048 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) + from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page); * Called with sg->parent->shadow_lock. */ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, - unsigned long offset, pte_t *pte) + unsigned long gaddr, pte_t *pte) { struct gmap_rmap *rmap, *rnext, *head; - unsigned long gaddr, start, end, bits, raddr; - unsigned long *table; + unsigned long start, end, bits, raddr; BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->parent->guest_table_lock); - table = radix_tree_lookup(&sg->parent->host_to_guest, - vmaddr >> PMD_SHIFT); - gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; - spin_unlock(&sg->parent->guest_table_lock); - if (!table) - return; spin_lock(&sg->guest_table_lock); if (sg->removed) { @@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { - unsigned long offset, gaddr; + unsigned long offset, gaddr = 0; unsigned long *table; struct gmap *gmap, *sg, *next; @@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, offset = offset * (4096 / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { - if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { - spin_lock(&gmap->shadow_lock); - list_for_each_entry_safe(sg, next, - &gmap->children, list) - gmap_shadow_notify(sg, vmaddr, offset, pte); - spin_unlock(&gmap->shadow_lock); - } - if (!(bits & PGSTE_IN_BIT)) - continue; spin_lock(&gmap->guest_table_lock); table = radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); if (table) gaddr = __gmap_segment_gaddr(table) + offset; spin_unlock(&gmap->guest_table_lock); - if (table) + if (!table) + continue; + + if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, + &gmap->children, list) + gmap_shadow_notify(sg, vmaddr, gaddr, pte); + spin_unlock(&gmap->shadow_lock); + } + if (bits & PGSTE_IN_BIT) gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); } rcu_read_unlock(); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 18d4107e10ee..b7b779c40a5b 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if ((end <= start) || (end > TASK_SIZE)) + if ((end <= start) || (end > mm->context.asce_limit)) return 0; /* * local_irq_save() doesn't prevent pagetable teardown, but does diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ee5066718b21..ee6a1d3d4983 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -39,6 +39,7 @@ #include <asm/sections.h> #include <asm/ctl_reg.h> #include <asm/sclp.h> +#include <asm/set_memory.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 50618614881f..b017daed6887 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; + int rc; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = 0; @@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, else info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; - return vm_unmapped_area(&info); + addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { + rc = crst_table_upgrade(mm); + if (rc) + return (unsigned long) rc; + } + + return addr; } unsigned long @@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; + int rc; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; /* requesting a specific address */ if (addr) { @@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; @@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = TASK_SIZE; addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; } - return addr; -} - -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) -{ - if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE) - return 0; - if (!(flags & MAP_FIXED)) - addr = 0; - if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm); - return 0; -} - -static unsigned long -s390_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); } - return area; -} - -static unsigned long -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm); - if (rc) - return (unsigned long) rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, - pgoff, flags); - } - return area; + return addr; } + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = mmap_base_legacy(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area; + mm->get_unmapped_area = arch_get_unmapped_area; } else { mm->mmap_base = mmap_base(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area_topdown; + mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 3330ea124eec..69a7b01ae746 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -13,8 +13,7 @@ #include <linux/gfp.h> #include <linux/init.h> -#define ESSA_SET_STABLE 1 -#define ESSA_SET_UNUSED 2 +#include <asm/page-states.h> static int cmma_flag = 1; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index fc5dc33bb141..49e721f3645e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -8,6 +8,7 @@ #include <asm/facility.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/set_memory.h> static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { @@ -94,7 +95,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, new = pte_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pte_val(new) |= _PAGE_NOEXEC; else if (flags & SET_MEMORY_X) pte_val(new) &= ~_PAGE_NOEXEC; @@ -144,7 +145,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, new = pmd_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; @@ -221,7 +222,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, new = pud_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pud_val(new) |= _REGION_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pud_val(new) &= ~_REGION_ENTRY_NOEXEC; @@ -288,6 +289,10 @@ static int change_page_attr(unsigned long addr, unsigned long end, int __set_memory(unsigned long addr, int numpages, unsigned long flags) { + if (!MACHINE_HAS_NX) + flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); + if (!flags) + return 0; addr &= PAGE_MASK; return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 995f78532cc2..f502cbe657af 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 53; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - mm->task_size = mm->context.asce_limit; spin_unlock_bh(&mm->page_table_lock); on_each_cpu(__crst_table_upgrade, mm, 0); @@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 31; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); if (current->active_mm == mm) @@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) struct page *page; unsigned long *table; - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (page) { table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b48dc5f1900b..947b66a5cdba 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -23,6 +23,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -608,12 +609,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; @@ -770,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, return 0; } EXPORT_SYMBOL(get_guest_storage_key); + +/** + * pgste_perform_essa - perform ESSA actions on the PGSTE. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @orc: the specific action to perform, see the ESSA_SET_* macros. + * @oldpte: the PTE will be saved there if the pointer is not NULL. + * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. + * + * Return: 1 if the page is to be added to the CBRL, otherwise 0, + * or < 0 in case of error. -EINVAL is returned for invalid values + * of orc, -EFAULT for invalid addresses. + */ +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste) +{ + unsigned long pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + int res = 0; + + WARN_ON_ONCE(orc > ESSA_MAX); + if (unlikely(orc > ESSA_MAX)) + return -EINVAL; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + if (oldpte) + *oldpte = pte_val(*ptep); + if (oldpgste) + *oldpgste = pgstev; + + switch (orc) { + case ESSA_GET_STATE: + break; + case ESSA_SET_STABLE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + break; + case ESSA_SET_UNUSED: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_UNUSED; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_POT_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; + break; + } + if (pgstev & _PGSTE_GPS_ZERO) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + break; + } + if (!(pgstev & PGSTE_GC_BIT)) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + res = 1; + break; + } + break; + case ESSA_SET_STABLE_RESIDENT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + /* + * Since the resident state can go away any time after this + * call, we will not make this page resident. We can revisit + * this decision if a guest will ever start using this. + */ + break; + case ESSA_SET_STABLE_IF_RESIDENT: + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + } + break; + default: + /* we should never get here! */ + break; + } + /* If we are discarding a page, set it to logical zero */ + if (res) + pgstev |= _PGSTE_GPS_ZERO; + + pgste_val(pgste) = pgstev; + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + return res; +} +EXPORT_SYMBOL(pgste_perform_essa); + +/** + * set_pgste_bits - set specific PGSTE bits. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @bits: a bitmask representing the bits that will be touched + * @value: the values of the bits to be written. Only the bits in the mask + * will be written. + * + * Return: 0 on success, < 0 in case of error. + */ +int set_pgste_bits(struct mm_struct *mm, unsigned long hva, + unsigned long bits, unsigned long value) +{ + spinlock_t *ptl; + pgste_t new; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + new = pgste_get_lock(ptep); + + pgste_val(new) &= ~bits; + pgste_val(new) |= value & bits; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(set_pgste_bits); + +/** + * get_pgste - get the current PGSTE for the given address. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @pgstep: will be written with the current PGSTE for the given address. + * + * Return: 0 on success, < 0 in case of error. + */ +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) +{ + spinlock_t *ptl; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + *pgstep = pgste_val(pgste_get(ptep)); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(get_pgste); #endif diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 60d38993f232..c33c94b4be60 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -17,6 +17,7 @@ #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/set_memory.h> static DEFINE_MUTEX(vmem_mutex); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 4ecf6d687509..6e97a2e3fd8d 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -24,6 +24,7 @@ #include <linux/bpf.h> #include <asm/cacheflush.h> #include <asm/dis.h> +#include <asm/set_memory.h> #include "bpf_jit.h" int bpf_jit_enable __read_mostly; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 364b9d824be3..8051df109db3 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -60,16 +60,8 @@ static DEFINE_SPINLOCK(zpci_domain_lock); static struct airq_iv *zpci_aisb_iv; static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; -/* Adapter interrupt definitions */ -static void zpci_irq_handler(struct airq_struct *airq); - -static struct airq_struct zpci_airq = { - .handler = zpci_irq_handler, - .isc = PCI_ISC, -}; - #define ZPCI_IOMAP_ENTRIES \ - min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \ + min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \ ZPCI_IOMAP_MAX_ENTRIES) static DEFINE_SPINLOCK(zpci_iomap_lock); @@ -214,8 +206,6 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev) return rc; } -#define ZPCI_PCIAS_CFGSPC 15 - static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) { u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); @@ -507,6 +497,11 @@ static void zpci_unmap_resources(struct pci_dev *pdev) } } +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; + static int __init zpci_irq_init(void) { int rc; @@ -871,11 +866,6 @@ int zpci_report_error(struct pci_dev *pdev, } EXPORT_SYMBOL(zpci_report_error); -static inline int barsize(u8 size) -{ - return (size) ? (1 << size) >> 10 : 0; -} - static int zpci_mem_init(void) { BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 0cf802de52a1..be63fbd699fd 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -82,6 +82,7 @@ static struct facility_def facility_defs[] = { 78, /* enhanced-DAT 2 */ 130, /* instruction-execution-protection */ 131, /* enhanced-SOP 2 and side-effect */ + 146, /* msa extension 8 */ -1 /* END */ } }, |