diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 21:22:51 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 21:22:51 +0200 |
commit | 015cd867e566e3a27b5e8062eb24eeaa4d77297f (patch) | |
tree | d96c90119b3c454b5c5cfb07f2409a87bbd29754 /arch/s390 | |
parent | Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff) | |
parent | s390/pci: Delete an unnecessary check before the function call "pci_dev_put" (diff) | |
download | linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.tar.xz linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
"There are a couple of new things for s390 with this merge request:
- a new scheduling domain "drawer" is added to reflect the unusual
topology found on z13 machines. Performance tests showed up to 8
percent gain with the additional domain.
- the new crc-32 checksum crypto module uses the vector-galois-field
multiply and sum SIMD instruction to speed up crc-32 and crc-32c.
- proper __ro_after_init support, this requires RO_AFTER_INIT_DATA in
the generic vmlinux.lds linker script definitions.
- kcov instrumentation support. A prerequisite for that is the
inline assembly basic block cleanup, which is the reason for the
net/iucv/iucv.c change.
- support for 2GB pages is added to the hugetlbfs backend.
Then there are two removals:
- the oprofile hardware sampling support is dead code and is removed.
The oprofile user space uses the perf interface nowadays.
- the ETR clock synchronization is removed, this has been superseeded
be the STP clock synchronization. And it always has been
"interesting" code..
And the usual bug fixes and cleanups"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (82 commits)
s390/pci: Delete an unnecessary check before the function call "pci_dev_put"
s390/smp: clean up a condition
s390/cio/chp : Remove deprecated create_singlethread_workqueue
s390/chsc: improve channel path descriptor determination
s390/chsc: sanitize fmt check for chp_desc determination
s390/cio: make fmt1 channel path descriptor optional
s390/chsc: fix ioctl CHSC_INFO_CU command
s390/cio/device_ops: fix kernel doc
s390/cio: allow to reset channel measurement block
s390/console: Make preferred console handling more consistent
s390/mm: fix gmap tlb flush issues
s390/mm: add support for 2GB hugepages
s390: have unique symbol for __switch_to address
s390/cpuinfo: show maximum thread id
s390/ptrace: clarify bits in the per_struct
s390: stack address vs thread_info
s390: remove pointless load within __switch_to
s390: enable kcov support
s390/cpumf: use basic block for ecctr inline assembly
s390/hypfs: use basic block for diag inline assembly
...
Diffstat (limited to 'arch/s390')
87 files changed, 2550 insertions, 3830 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a8c259059adf..9e607bf2d640 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,7 @@ config S390 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK @@ -163,6 +164,7 @@ config S390 select NO_BOOTMEM select OLD_SIGACTION select OLD_SIGSUSPEND3 + select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select TTY select VIRT_CPU_ACCOUNTING @@ -477,6 +479,9 @@ config SCHED_MC config SCHED_BOOK def_bool n +config SCHED_DRAWER + def_bool n + config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" @@ -484,6 +489,7 @@ config SCHED_TOPOLOGY select SCHED_SMT select SCHED_MC select SCHED_BOOK + select SCHED_DRAWER help Topology scheduler support improves the CPU scheduler's decision making when dealing with machines that have multi-threading, @@ -605,16 +611,6 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. -config PCI_NR_MSI - int "Maximum number of MSI interrupts (64-32768)" - range 64 32768 - default "256" - help - This defines the number of virtual interrupts the kernel will - provide for MSI interrupts. If you configure your system to have - too few drivers will fail to allocate MSI interrupts for all - PCI devices. - source "drivers/pci/Kconfig" endif # PCI diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 1dd210347e12..98ec652cc332 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -4,6 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # +KCOV_INSTRUMENT := n + targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += misc.o piggy.o sizes.h head.o diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index d5ec71b2ed02..889ea3450210 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -678,6 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f46a35115d2d..1bcfd764910a 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -616,6 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index ba0f2a58b8cd..13ff090139c8 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -615,6 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 7f0b7cda6259..d1033de4c4ee 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -9,3 +9,6 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o +obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o + +crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c new file mode 100644 index 000000000000..577ae1d4ae89 --- /dev/null +++ b/arch/s390/crypto/crc32-vx.c @@ -0,0 +1,310 @@ +/* + * Crypto-API module for CRC-32 algorithms implemented with the + * z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <crypto/internal/hash.h> +#include <asm/fpu/api.h> + + +#define CRC32_BLOCK_SIZE 1 +#define CRC32_DIGEST_SIZE 4 + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +struct crc_ctx { + u32 key; +}; + +struct crc_desc_ctx { + u32 crc; +}; + +/* Prototypes for functions in assembly files */ +u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementation + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + * + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + static u32 __pure ___fname(u32 crc, \ + unsigned char const *data, size_t datalen) \ + { \ + struct kernel_fpu vxstate; \ + unsigned long prealign, aligned, remaining; \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign = VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -= prealign; \ + crc = ___crc32_sw(crc, data, prealign); \ + data = (void *)((unsigned long)data + prealign); \ + } \ + \ + if (datalen < VX_MIN_LEN) \ + return ___crc32_sw(crc, data, datalen); \ + \ + aligned = datalen & ~VX_ALIGN_MASK; \ + remaining = datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc = ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate); \ + \ + if (remaining) \ + crc = ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } + +DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) +DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) +DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) + + +static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) +{ + struct crc_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = 0; + return 0; +} + +static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) +{ + struct crc_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static int crc32_vx_init(struct shash_desc *desc) +{ + struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + return 0; +} + +static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, + unsigned int newkeylen) +{ + struct crc_ctx *mctx = crypto_shash_ctx(tfm); + + if (newkeylen != sizeof(mctx->key)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + mctx->key = le32_to_cpu(*(__le32 *)newkey); + return 0; +} + +static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, + unsigned int newkeylen) +{ + struct crc_ctx *mctx = crypto_shash_ctx(tfm); + + if (newkeylen != sizeof(mctx->key)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + mctx->key = be32_to_cpu(*(__be32 *)newkey); + return 0; +} + +static int crc32le_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(&ctx->crc); + return 0; +} + +static int crc32be_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__be32 *)out = cpu_to_be32p(&ctx->crc); + return 0; +} + +static int crc32c_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + /* + * Perform a final XOR with 0xFFFFFFFF to be in sync + * with the generic crc32c shash implementation. + */ + *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); + return 0; +} + +static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); + return 0; +} + +static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); + return 0; +} + +static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + /* + * Perform a final XOR with 0xFFFFFFFF to be in sync + * with the generic crc32c shash implementation. + */ + *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); + return 0; +} + + +#define CRC32_VX_FINUP(alg, func) \ + static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ + unsigned int datalen, u8 *out) \ + { \ + return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ + data, datalen, out); \ + } + +CRC32_VX_FINUP(crc32le, crc32_le_vx) +CRC32_VX_FINUP(crc32be, crc32_be_vx) +CRC32_VX_FINUP(crc32c, crc32c_le_vx) + +#define CRC32_VX_DIGEST(alg, func) \ + static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ + unsigned int len, u8 *out) \ + { \ + return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ + data, len, out); \ + } + +CRC32_VX_DIGEST(crc32le, crc32_le_vx) +CRC32_VX_DIGEST(crc32be, crc32_be_vx) +CRC32_VX_DIGEST(crc32c, crc32c_le_vx) + +#define CRC32_VX_UPDATE(alg, func) \ + static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ + unsigned int datalen) \ + { \ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ + ctx->crc = func(ctx->crc, data, datalen); \ + return 0; \ + } + +CRC32_VX_UPDATE(crc32le, crc32_le_vx) +CRC32_VX_UPDATE(crc32be, crc32_be_vx) +CRC32_VX_UPDATE(crc32c, crc32c_le_vx) + + +static struct shash_alg crc32_vx_algs[] = { + /* CRC-32 LE */ + { + .init = crc32_vx_init, + .setkey = crc32_vx_setkey, + .update = crc32le_vx_update, + .final = crc32le_vx_final, + .finup = crc32le_vx_finup, + .digest = crc32le_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_zero, + }, + }, + /* CRC-32 BE */ + { + .init = crc32_vx_init, + .setkey = crc32be_vx_setkey, + .update = crc32be_vx_update, + .final = crc32be_vx_final, + .finup = crc32be_vx_finup, + .digest = crc32be_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32be", + .cra_driver_name = "crc32be-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_zero, + }, + }, + /* CRC-32C LE */ + { + .init = crc32_vx_init, + .setkey = crc32_vx_setkey, + .update = crc32c_vx_update, + .final = crc32c_vx_final, + .finup = crc32c_vx_finup, + .digest = crc32c_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_invert, + }, + }, +}; + + +static int __init crc_vx_mod_init(void) +{ + return crypto_register_shashes(crc32_vx_algs, + ARRAY_SIZE(crc32_vx_algs)); +} + +static void __exit crc_vx_mod_exit(void) +{ + crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); +} + +module_cpu_feature_match(VXRS, crc_vx_mod_init); +module_exit(crc_vx_mod_exit); + +MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-vx"); +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S new file mode 100644 index 000000000000..8013989cd2e5 --- /dev/null +++ b/arch/s390/crypto/crc32be-vx.S @@ -0,0 +1,207 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of CRC-32 checksums. + * + * This CRC-32 implementation algorithm processes the most-significant + * bit first (BE). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/vx-insn.h> + +/* Vector register range containing CRC-32 constants */ +#define CONST_R1R2 %v9 +#define CONST_R3R4 %v10 +#define CONST_R5 %v11 +#define CONST_R6 %v12 +#define CONST_RU_POLY %v13 +#define CONST_CRC_POLY %v14 + +.data +.align 8 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these defintions: + * + * R1 = x4*128+64 mod P(x) + * R2 = x4*128 mod P(x) + * R3 = x128+64 mod P(x) + * R4 = x128 mod P(x) + * R5 = x96 mod P(x) + * R6 = x64 mod P(x) + * + * Barret reduction constant, u, is defined as floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * Note that the constant definitions below are extended in order to compute + * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. + * The righmost doubleword can be 0 to prevent contribution to the result or + * can be multiplied by 1 to perform an XOR without the need for a separate + * VECTOR EXCLUSIVE OR instruction. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + */ + +.Lconstants_CRC_32_BE: + .quad 0x08833794c, 0x0e6228b11 # R1, R2 + .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4 + .quad 0x0f200aa66, 1 << 32 # R5, x32 + .quad 0x0490d678d, 1 # R6, 1 + .quad 0x104d101df, 0 # u + .quad 0x104C11DB7, 0 # P(x) + +.previous + +.text +/* + * The CRC-32 function(s) use these calling conventions: + * + * Parameters: + * + * %r2: Initial CRC value, typically ~0; and final CRC (return) value. + * %r3: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * %r4: Length of the buffer, must be 64 bytes or greater. + * + * Register usage: + * + * %r5: CRC-32 constant pool base pointer. + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * + * V9..V14: CRC-32 constants. + */ +ENTRY(crc32_be_vgfm_16) + /* Load CRC-32 constants */ + larl %r5,.Lconstants_CRC_32_BE + VLM CONST_R1R2,CONST_CRC_POLY,0,%r5 + + /* Load the initial CRC value into the leftmost word of V0. */ + VZERO %v0 + VLVGF %v0,%r2,0 + + /* Load a 64-byte data chunk and XOR with CRC */ + VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ + VX %v1,%v0,%v1 /* V1 ^= CRC */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + /* Check remaining buffer size and jump to proper folding method */ + cghi %r4,64 + jl .Lless_than_64bytes + +.Lfold_64bytes_loop: + /* Load the next 64-byte data chunk into V5 to V8 */ + VLM %v5,%v8,0,%r3 + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the reduction constants in V0. The intermediate result is + * then folded (accumulated) with the next data chunk in V5 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + VGFMAG %v1,CONST_R1R2,%v1,%v5 + VGFMAG %v2,CONST_R1R2,%v2,%v6 + VGFMAG %v3,CONST_R1R2,%v3,%v7 + VGFMAG %v4,CONST_R1R2,%v4,%v8 + + /* Adjust buffer pointer and length for next loop */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jnl .Lfold_64bytes_loop + +.Lless_than_64bytes: + /* Fold V1 to V4 into a single 128-bit value in V1 */ + VGFMAG %v1,CONST_R3R4,%v1,%v2 + VGFMAG %v1,CONST_R3R4,%v1,%v3 + VGFMAG %v1,CONST_R3R4,%v1,%v4 + + /* Check whether to continue with 64-bit folding */ + cghi %r4,16 + jl .Lfinal_fold + +.Lfold_16bytes_loop: + + VL %v2,0,,%r3 /* Load next data chunk */ + VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */ + + /* Adjust buffer pointer and size for folding next data chunk */ + aghi %r3,16 + aghi %r4,-16 + + /* Process remaining data chunks */ + cghi %r4,16 + jnl .Lfold_16bytes_loop + +.Lfinal_fold: + /* + * The R5 constant is used to fold a 128-bit value into an 96-bit value + * that is XORed with the next 96-bit input data chunk. To use a single + * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to + * form an intermediate 96-bit value (with appended zeros) which is then + * XORed with the intermediate reduction result. + */ + VGFMG %v1,CONST_R5,%v1 + + /* + * Further reduce the remaining 96-bit value to a 64-bit value using a + * single VGFMG, the rightmost doubleword is multiplied with 0x1. The + * intermediate result is then XORed with the product of the leftmost + * doubleword with R6. The result is a 64-bit value and is subject to + * the Barret reduction. + */ + VGFMG %v1,CONST_R6,%v1 + + /* + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: To compensate the division by x^32, use the vector unpack + * instruction to move the leftmost word into the leftmost doubleword + * of the vector register. The rightmost doubleword is multiplied + * with zero to not contribute to the intermedate results. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + VUPLLF %v2,%v1 + VGFMG %v2,CONST_RU_POLY,%v2 + + /* + * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is in the rightmost word of V2. + */ + VUPLLF %v2,%v2 + VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 + +.Ldone: + VLGVF %r2,%v2,3 + br %r14 + +.previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S new file mode 100644 index 000000000000..17f2504c2633 --- /dev/null +++ b/arch/s390/crypto/crc32le-vx.S @@ -0,0 +1,268 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet + * and Castagnoli. + * + * This CRC-32 implementation algorithm is bitreflected and processes + * the least-significant bit first (Little-Endian). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/vx-insn.h> + +/* Vector register range containing CRC-32 constants */ +#define CONST_PERM_LE2BE %v9 +#define CONST_R2R1 %v10 +#define CONST_R4R3 %v11 +#define CONST_R5 %v12 +#define CONST_RU_POLY %v13 +#define CONST_CRC_POLY %v14 + +.data +.align 8 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 + * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 + * R3 = [(x128+32 mod P'(x) << 32)]' << 1 + * R4 = [(x128-32 mod P'(x) << 32)]' << 1 + * R5 = [(x64 mod P'(x) << 32)]' << 1 + * R6 = [(x32 mod P'(x) << 32)]' << 1 + * + * The bitreflected Barret reduction constant, u', is defined as + * the bit reversal of floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + * + * CRC-32C (Castagnoli) polynomials: + * + * P(x) = 0x1EDC6F41 + * P'(x) = 0x82F63B78 + */ + +.Lconstants_CRC_32_LE: + .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask + .quad 0x1c6e41596, 0x154442bd4 # R2, R1 + .quad 0x0ccaa009e, 0x1751997d0 # R4, R3 + .octa 0x163cd6124 # R5 + .octa 0x1F7011641 # u' + .octa 0x1DB710641 # P'(x) << 1 + +.Lconstants_CRC_32C_LE: + .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask + .quad 0x09e4addf8, 0x740eef02 # R2, R1 + .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3 + .octa 0x0dd45aab8 # R5 + .octa 0x0dea713f1 # u' + .octa 0x105ec76f0 # P'(x) << 1 + +.previous + + +.text + +/* + * The CRC-32 functions use these calling conventions: + * + * Parameters: + * + * %r2: Initial CRC value, typically ~0; and final CRC (return) value. + * %r3: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * %r4: Length of the buffer, must be 64 bytes or greater. + * + * Register usage: + * + * %r5: CRC-32 constant pool base pointer. + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * V9: Constant for BE->LE conversion and shift operations + * + * V10..V14: CRC-32 constants. + */ + +ENTRY(crc32_le_vgfm_16) + larl %r5,.Lconstants_CRC_32_LE + j crc32_le_vgfm_generic + +ENTRY(crc32c_le_vgfm_16) + larl %r5,.Lconstants_CRC_32C_LE + j crc32_le_vgfm_generic + + +crc32_le_vgfm_generic: + /* Load CRC-32 constants */ + VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5 + + /* + * Load the initial CRC value. + * + * The CRC value is loaded into the rightmost word of the + * vector register and is later XORed with the LSB portion + * of the loaded input data. + */ + VZERO %v0 /* Clear V0 */ + VLVGF %v0,%r2,3 /* Load CRC into rightmost word */ + + /* Load a 64-byte data chunk and XOR with CRC */ + VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ + VPERM %v1,%v1,%v1,CONST_PERM_LE2BE + VPERM %v2,%v2,%v2,CONST_PERM_LE2BE + VPERM %v3,%v3,%v3,CONST_PERM_LE2BE + VPERM %v4,%v4,%v4,CONST_PERM_LE2BE + + VX %v1,%v0,%v1 /* V1 ^= CRC */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jl .Lless_than_64bytes + +.Lfold_64bytes_loop: + /* Load the next 64-byte data chunk into V5 to V8 */ + VLM %v5,%v8,0,%r3 + VPERM %v5,%v5,%v5,CONST_PERM_LE2BE + VPERM %v6,%v6,%v6,CONST_PERM_LE2BE + VPERM %v7,%v7,%v7,CONST_PERM_LE2BE + VPERM %v8,%v8,%v8,CONST_PERM_LE2BE + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the R1 and R2 reduction constants in V0. The intermediate result + * is then folded (accumulated) with the next data chunk in V5 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + VGFMAG %v1,CONST_R2R1,%v1,%v5 + VGFMAG %v2,CONST_R2R1,%v2,%v6 + VGFMAG %v3,CONST_R2R1,%v3,%v7 + VGFMAG %v4,CONST_R2R1,%v4,%v8 + + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jnl .Lfold_64bytes_loop + +.Lless_than_64bytes: + /* + * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 + * and R4 and accumulating the next 128-bit chunk until a single 128-bit + * value remains. + */ + VGFMAG %v1,CONST_R4R3,%v1,%v2 + VGFMAG %v1,CONST_R4R3,%v1,%v3 + VGFMAG %v1,CONST_R4R3,%v1,%v4 + + cghi %r4,16 + jl .Lfinal_fold + +.Lfold_16bytes_loop: + + VL %v2,0,,%r3 /* Load next data chunk */ + VPERM %v2,%v2,%v2,CONST_PERM_LE2BE + VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */ + + aghi %r3,16 + aghi %r4,-16 + + cghi %r4,16 + jnl .Lfold_16bytes_loop + +.Lfinal_fold: + /* + * Set up a vector register for byte shifts. The shift value must + * be loaded in bits 1-4 in byte element 7 of a vector register. + * Shift by 8 bytes: 0x40 + * Shift by 4 bytes: 0x20 + */ + VLEIB %v9,0x40,7 + + /* + * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes + * to move R4 into the rightmost doubleword and set the leftmost + * doubleword to 0x1. + */ + VSRLB %v0,CONST_R4R3,%v9 + VLEIG %v0,1,0 + + /* + * Compute GF(2) product of V1 and V0. The rightmost doubleword + * of V1 is multiplied with R4. The leftmost doubleword of V1 is + * multiplied by 0x1 and is then XORed with rightmost product. + * Implicitly, the intermediate leftmost product becomes padded + */ + VGFMG %v1,%v0,%v1 + + /* + * Now do the final 32-bit fold by multiplying the rightmost word + * in V1 with R5 and XOR the result with the remaining bits in V1. + * + * To achieve this by a single VGFMAG, right shift V1 by a word + * and store the result in V2 which is then accumulated. Use the + * vector unpack instruction to load the rightmost half of the + * doubleword into the rightmost doubleword element of V1; the other + * half is loaded in the leftmost doubleword. + * The vector register with CONST_R5 contains the R5 constant in the + * rightmost doubleword and the leftmost doubleword is zero to ignore + * the leftmost product of V1. + */ + VLEIB %v9,0x20,7 /* Shift by words */ + VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */ + VUPLLF %v1,%v1 /* Split rightmost doubleword */ + VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */ + + /* + * Apply a Barret reduction to compute the final 32-bit CRC value. + * + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: The leftmost doubleword of vector register containing + * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product + * is zero and does not contribute to the final result. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + VUPLLF %v2,%v1 + VGFMG %v2,CONST_RU_POLY,%v2 + + /* + * Compute the GF(2) product of the CRC polynomial with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is stored in word element 2 of V2. + */ + VUPLLF %v2,%v2 + VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 + +.Ldone: + VLGVF %r2,%v2,2 + br %r14 + +.previous diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 3f571ea89509..ccccebeeaaf6 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -225,12 +225,16 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_CRC7=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 045035796ca7..67d43a0eabb4 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -337,25 +337,27 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) /* Diagnose 204 functions */ -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) +static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) { - register unsigned long _subcode asm("0") = subcode; + register unsigned long _subcode asm("0") = *subcode; register unsigned long _size asm("1") = size; asm volatile( " diag %2,%0,0x204\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - if (_subcode) - return -1; + *subcode = _subcode; return _size; } static int diag204(unsigned long subcode, unsigned long size, void *addr) { diag_stat_inc(DIAG_STAT_X204); - return __diag204(subcode, size, addr); + size = __diag204(&subcode, size, addr); + if (subcode) + return -1; + return size; } /* diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 44feac38ccfc..012919d9833b 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -70,7 +70,7 @@ static int diag2fc(int size, char* query, void *addr) diag_stat_inc(DIAG_STAT_X2FC); asm volatile( " diag %0,%1,0x2fc\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory"); diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 22da3b34c655..05219a5e0b2f 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -13,9 +13,6 @@ #define L1_CACHE_SHIFT 8 #define NET_SKB_PAD 32 -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) - -/* Read-only memory is marked before mark_rodata_ro() is called. */ -#define __ro_after_init __read_mostly +#define __read_mostly __section(.data..read_mostly) #endif diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index d1e7b0a0feeb..f7ed88cc066e 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -320,7 +320,7 @@ struct cio_iplinfo { extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo); /* Function from drivers/s390/cio/chsc.c */ -int chsc_sstpc(void *page, unsigned int op, u16 ctrl); +int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); #endif diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 9dd04b9e9782..03516476127b 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -169,16 +169,27 @@ static inline int lcctl(u64 ctl) } /* Extract CPU counter */ -static inline int ecctr(u64 ctr, u64 *val) +static inline int __ecctr(u64 ctr, u64 *content) { - register u64 content asm("4") = 0; + register u64 _content asm("4") = 0; int cc; asm volatile ( " .insn rre,0xb2e40000,%0,%2\n" " ipm %1\n" " srl %1,28\n" - : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc"); + *content = _content; + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + u64 content; + int cc; + + cc = __ecctr(ctr, &content); if (!cc) *val = content; return cc; diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 5fac921c1c42..86cae09e076a 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -49,7 +49,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) diag_stat_inc(DIAG_STAT_X010); asm volatile( "0: diag %0,%1,0x10\n" - "1:\n" + "1: nopr %%r7\n" EX_TABLE(0b, 1b) EX_TABLE(1b, 1b) : : "a" (start_addr), "a" (end_addr)); diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h deleted file mode 100644 index 105f90e63a0e..000000000000 --- a/arch/s390/include/asm/etr.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright IBM Corp. 2006 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#ifndef __S390_ETR_H -#define __S390_ETR_H - -/* ETR attachment control register */ -struct etr_eacr { - unsigned int e0 : 1; /* port 0 stepping control */ - unsigned int e1 : 1; /* port 1 stepping control */ - unsigned int _pad0 : 5; /* must be 00100 */ - unsigned int dp : 1; /* data port control */ - unsigned int p0 : 1; /* port 0 change recognition control */ - unsigned int p1 : 1; /* port 1 change recognition control */ - unsigned int _pad1 : 3; /* must be 000 */ - unsigned int ea : 1; /* ETR alert control */ - unsigned int es : 1; /* ETR sync check control */ - unsigned int sl : 1; /* switch to local control */ -} __attribute__ ((packed)); - -/* Port state returned by steai */ -enum etr_psc { - etr_psc_operational = 0, - etr_psc_semi_operational = 1, - etr_psc_protocol_error = 4, - etr_psc_no_symbols = 8, - etr_psc_no_signal = 12, - etr_psc_pps_mode = 13 -}; - -/* Logical port state returned by stetr */ -enum etr_lpsc { - etr_lpsc_operational_step = 0, - etr_lpsc_operational_alt = 1, - etr_lpsc_semi_operational = 2, - etr_lpsc_protocol_error = 4, - etr_lpsc_no_symbol_sync = 8, - etr_lpsc_no_signal = 12, - etr_lpsc_pps_mode = 13 -}; - -/* ETR status words */ -struct etr_esw { - struct etr_eacr eacr; /* attachment control register */ - unsigned int y : 1; /* stepping mode */ - unsigned int _pad0 : 5; /* must be 00000 */ - unsigned int p : 1; /* stepping port number */ - unsigned int q : 1; /* data port number */ - unsigned int psc0 : 4; /* port 0 state code */ - unsigned int psc1 : 4; /* port 1 state code */ -} __attribute__ ((packed)); - -/* Second level data register status word */ -struct etr_slsw { - unsigned int vv1 : 1; /* copy of validity bit data frame 1 */ - unsigned int vv2 : 1; /* copy of validity bit data frame 2 */ - unsigned int vv3 : 1; /* copy of validity bit data frame 3 */ - unsigned int vv4 : 1; /* copy of validity bit data frame 4 */ - unsigned int _pad0 : 19; /* must by all zeroes */ - unsigned int n : 1; /* EAF port number */ - unsigned int v1 : 1; /* validity bit ETR data frame 1 */ - unsigned int v2 : 1; /* validity bit ETR data frame 2 */ - unsigned int v3 : 1; /* validity bit ETR data frame 3 */ - unsigned int v4 : 1; /* validity bit ETR data frame 4 */ - unsigned int _pad1 : 4; /* must be 0000 */ -} __attribute__ ((packed)); - -/* ETR data frames */ -struct etr_edf1 { - unsigned int u : 1; /* untuned bit */ - unsigned int _pad0 : 1; /* must be 0 */ - unsigned int r : 1; /* service request bit */ - unsigned int _pad1 : 4; /* must be 0000 */ - unsigned int a : 1; /* time adjustment bit */ - unsigned int net_id : 8; /* ETR network id */ - unsigned int etr_id : 8; /* id of ETR which sends data frames */ - unsigned int etr_pn : 8; /* port number of ETR output port */ -} __attribute__ ((packed)); - -struct etr_edf2 { - unsigned int etv : 32; /* Upper 32 bits of TOD. */ -} __attribute__ ((packed)); - -struct etr_edf3 { - unsigned int rc : 8; /* failure reason code */ - unsigned int _pad0 : 3; /* must be 000 */ - unsigned int c : 1; /* ETR coupled bit */ - unsigned int tc : 4; /* ETR type code */ - unsigned int blto : 8; /* biased local time offset */ - /* (blto - 128) * 15 = minutes */ - unsigned int buo : 8; /* biased utc offset */ - /* (buo - 128) = leap seconds */ -} __attribute__ ((packed)); - -struct etr_edf4 { - unsigned int ed : 8; /* ETS device dependent data */ - unsigned int _pad0 : 1; /* must be 0 */ - unsigned int buc : 5; /* biased ut1 correction */ - /* (buc - 16) * 0.1 seconds */ - unsigned int em : 6; /* ETS error magnitude */ - unsigned int dc : 6; /* ETS drift code */ - unsigned int sc : 6; /* ETS steering code */ -} __attribute__ ((packed)); - -/* - * ETR attachment information block, two formats - * format 1 has 4 reserved words with a size of 64 bytes - * format 2 has 16 reserved words with a size of 96 bytes - */ -struct etr_aib { - struct etr_esw esw; - struct etr_slsw slsw; - unsigned long long tsp; - struct etr_edf1 edf1; - struct etr_edf2 edf2; - struct etr_edf3 edf3; - struct etr_edf4 edf4; - unsigned int reserved[16]; -} __attribute__ ((packed,aligned(8))); - -/* ETR interruption parameter */ -struct etr_irq_parm { - unsigned int _pad0 : 8; - unsigned int pc0 : 1; /* port 0 state change */ - unsigned int pc1 : 1; /* port 1 state change */ - unsigned int _pad1 : 3; - unsigned int eai : 1; /* ETR alert indication */ - unsigned int _pad2 : 18; -} __attribute__ ((packed)); - -/* Query TOD offset result */ -struct etr_ptff_qto { - unsigned long long physical_clock; - unsigned long long tod_offset; - unsigned long long logical_tod_offset; - unsigned long long tod_epoch_difference; -} __attribute__ ((packed)); - -/* Inline assembly helper functions */ -static inline int etr_setr(struct etr_eacr *ctrl) -{ - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2160000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*ctrl)); - return rc; -} - -/* Stores a format 1 aib with 64 bytes */ -static inline int etr_stetr(struct etr_aib *aib) -{ - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2170000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*aib)); - return rc; -} - -/* Stores a format 2 aib with 96 bytes for specified port */ -static inline int etr_steai(struct etr_aib *aib, unsigned int func) -{ - register unsigned int reg0 asm("0") = func; - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2b30000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*aib), "d" (reg0)); - return rc; -} - -/* Function codes for the steai instruction. */ -#define ETR_STEAI_STEPPING_PORT 0x10 -#define ETR_STEAI_ALTERNATE_PORT 0x11 -#define ETR_STEAI_PORT_0 0x12 -#define ETR_STEAI_PORT_1 0x13 - -static inline int etr_ptff(void *ptff_block, unsigned int func) -{ - register unsigned int reg0 asm("0") = func; - register unsigned long reg1 asm("1") = (unsigned long) ptff_block; - int rc = -EOPNOTSUPP; - - asm volatile( - " .word 0x0104\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc), "=m" (ptff_block) - : "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc"); - return rc; -} - -/* Function codes for the ptff instruction. */ -#define ETR_PTFF_QAF 0x00 /* query available functions */ -#define ETR_PTFF_QTO 0x01 /* query tod offset */ -#define ETR_PTFF_QSI 0x02 /* query steering information */ -#define ETR_PTFF_ATO 0x40 /* adjust tod offset */ -#define ETR_PTFF_STO 0x41 /* set tod offset */ -#define ETR_PTFF_SFS 0x42 /* set fine steering rate */ -#define ETR_PTFF_SGS 0x43 /* set gross steering rate */ - -/* Functions needed by the machine check handler */ -int etr_switch_to_local(void); -int etr_sync_check(void); -void etr_queue_work(void); - -/* notifier for syncs */ -extern struct atomic_notifier_head s390_epoch_delta_notifier; - -/* STP interruption parameter */ -struct stp_irq_parm { - unsigned int _pad0 : 14; - unsigned int tsc : 1; /* Timing status change */ - unsigned int lac : 1; /* Link availability change */ - unsigned int tcpc : 1; /* Time control parameter change */ - unsigned int _pad2 : 15; -} __attribute__ ((packed)); - -#define STP_OP_SYNC 1 -#define STP_OP_CTRL 3 - -struct stp_sstpi { - unsigned int rsvd0; - unsigned int rsvd1 : 8; - unsigned int stratum : 8; - unsigned int vbits : 16; - unsigned int leaps : 16; - unsigned int tmd : 4; - unsigned int ctn : 4; - unsigned int rsvd2 : 3; - unsigned int c : 1; - unsigned int tst : 4; - unsigned int tzo : 16; - unsigned int dsto : 16; - unsigned int ctrl : 16; - unsigned int rsvd3 : 16; - unsigned int tto; - unsigned int rsvd4; - unsigned int ctnid[3]; - unsigned int rsvd5; - unsigned int todoff[4]; - unsigned int rsvd6[48]; -} __attribute__ ((packed)); - -/* Functions needed by the machine check handler */ -int stp_sync_check(void); -int stp_island_check(void); -void stp_queue_work(void); - -#endif /* __S390_ETR_H */ diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index 7ecb92b469b6..04cb4b4bcc5f 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -6,7 +6,7 @@ */ #ifndef _ASM_S390_FCX_H -#define _ASM_S390_FCX_H _ASM_S390_FCX_H +#define _ASM_S390_FCX_H #include <linux/types.h> diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 8ae236b0f80b..6aba6fc406ad 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -1,6 +1,41 @@ /* * In-kernel FPU support functions * + * + * Consider these guidelines before using in-kernel FPU functions: + * + * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel + * use of floating-point or vector registers and instructions. + * + * 2. For kernel_fpu_begin(), specify the vector register range you want to + * use with the KERNEL_VXR_* constants. Consider these usage guidelines: + * + * a) If your function typically runs in process-context, use the lower + * half of the vector registers, for example, specify KERNEL_VXR_LOW. + * b) If your function typically runs in soft-irq or hard-irq context, + * prefer using the upper half of the vector registers, for example, + * specify KERNEL_VXR_HIGH. + * + * If you adhere to these guidelines, an interrupted process context + * does not require to save and restore vector registers because of + * disjoint register ranges. + * + * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions + * includes logic to save and restore up to 16 vector registers at once. + * + * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different + * struct kernel_fpu states. Vector registers that are in use by outer + * levels are saved and restored. You can minimize the save and restore + * effort by choosing disjoint vector register ranges. + * + * 5. To use vector floating-point instructions, specify the KERNEL_FPC + * flag to save and restore floating-point controls in addition to any + * vector register range. + * + * 6. To use floating-point registers and instructions only, specify the + * KERNEL_FPR flag. This flag triggers a save and restore of vector + * registers V0 to V15 and floating-point controls. + * * Copyright IBM Corp. 2015 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ @@ -8,6 +43,8 @@ #ifndef _ASM_S390_FPU_API_H #define _ASM_S390_FPU_API_H +#include <linux/preempt.h> + void save_fpu_regs(void); static inline int test_fp_ctl(u32 fpc) @@ -27,4 +64,42 @@ static inline int test_fp_ctl(u32 fpc) return rc; } +#define KERNEL_VXR_V0V7 1 +#define KERNEL_VXR_V8V15 2 +#define KERNEL_VXR_V16V23 4 +#define KERNEL_VXR_V24V31 8 +#define KERNEL_FPR 16 +#define KERNEL_FPC 256 + +#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) +#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) +#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) + +#define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) + +struct kernel_fpu; + +/* + * Note the functions below must be called with preemption disabled. + * Do not enable preemption before calling __kernel_fpu_end() to prevent + * an corruption of an existing kernel FPU state. + * + * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. + */ +void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); +void __kernel_fpu_end(struct kernel_fpu *state); + + +static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) +{ + preempt_disable(); + __kernel_fpu_begin(state, flags); +} + +static inline void kernel_fpu_end(struct kernel_fpu *state) +{ + __kernel_fpu_end(state); + preempt_enable(); +} + #endif /* _ASM_S390_FPU_API_H */ diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h index fe937c9b6471..bce255ead72b 100644 --- a/arch/s390/include/asm/fpu/types.h +++ b/arch/s390/include/asm/fpu/types.h @@ -24,4 +24,14 @@ struct fpu { /* VX array structure for address operand constraints in inline assemblies */ struct vx_array { __vector128 _[__NUM_VXRS]; }; +/* In-kernel FPU state structure */ +struct kernel_fpu { + u32 mask; + u32 fpc; + union { + freg_t fprs[__NUM_FPRS]; + __vector128 vxrs[__NUM_VXRS]; + }; +}; + #endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index d9be7c0c1291..4c7fac75090e 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -41,7 +41,10 @@ static inline int prepare_hugepage_range(struct file *file, static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pte_val(*ptep) = _REGION3_ENTRY_EMPTY; + else + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 6fc44dca193e..4da22b2f0521 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -141,11 +141,11 @@ extern void setup_ipl(void); * DIAG 308 support */ enum diag308_subcode { - DIAG308_REL_HSA = 2, - DIAG308_IPL = 3, - DIAG308_DUMP = 4, - DIAG308_SET = 5, - DIAG308_STORE = 6, + DIAG308_REL_HSA = 2, + DIAG308_LOAD_CLEAR = 3, + DIAG308_LOAD_NORMAL_DUMP = 4, + DIAG308_SET = 5, + DIAG308_STORE = 6, }; enum diag308_ipl_type { diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index f97b055de76a..70c9bce766f5 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -7,11 +7,8 @@ #define NR_IRQS_BASE 3 -#ifdef CONFIG_PCI_NR_MSI -# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) -#else -# define NR_IRQS NR_IRQS_BASE -#endif +#define NR_IRQS NR_IRQS_BASE +#define NR_IRQS_LEGACY NR_IRQS_BASE /* External interruption codes */ #define EXT_IRQ_INTERRUPT_KEY 0x0040 diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 7f9fd5e3f1bf..9be198f5ee79 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/stringify.h> #define JUMP_LABEL_NOP_SIZE 6 #define JUMP_LABEL_NOP_OFFSET 2 diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index b47ad3b642cc..591e5a5279b0 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -43,9 +43,9 @@ typedef u16 kprobe_opcode_t; #define MAX_INSN_SIZE 0x0003 #define MAX_STACK_SIZE 64 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ - (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ + (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \ ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) + : (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) #define kretprobe_blacklist_size 0 diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h deleted file mode 100644 index 614dfaf47f71..000000000000 --- a/arch/s390/include/asm/mathemu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * IEEE floating point emulation. - * - * S390 version - * Copyright IBM Corp. 1999 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ - -#ifndef __MATHEMU__ -#define __MATHEMU__ - -extern int math_emu_b3(__u8 *, struct pt_regs *); -extern int math_emu_ed(__u8 *, struct pt_regs *); -extern int math_emu_ldr(__u8 *); -extern int math_emu_ler(__u8 *); -extern int math_emu_std(__u8 *, struct pt_regs *); -extern int math_emu_ld(__u8 *, struct pt_regs *); -extern int math_emu_ste(__u8 *, struct pt_regs *); -extern int math_emu_le(__u8 *, struct pt_regs *); -extern int math_emu_lfpc(__u8 *, struct pt_regs *); -extern int math_emu_stfpc(__u8 *, struct pt_regs *); -extern int math_emu_srnm(__u8 *, struct pt_regs *); - -#endif /* __MATHEMU__ */ - - - - diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 081b2ad99d73..18226437a832 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -6,7 +6,7 @@ typedef struct { cpumask_t cpu_attach_mask; - atomic_t attach_count; + atomic_t flush_count; unsigned int flush_mm; spinlock_t list_lock; struct list_head pgtable_list; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c837b79b455d..f77c638bf397 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); - atomic_set(&mm->context.attach_count, 0); + atomic_set(&mm->context.flush_count, 0); mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; @@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; if (prev == next) return; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); - atomic_inc(&next->context.attach_count); - atomic_dec(&prev->context.attach_count); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch @@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void) load_kernel_asce(); if (mm) { preempt_disable(); - while (atomic_read(&mm->context.attach_count) >> 16) + while (atomic_read(&mm->context.flush_count)) cpu_relax(); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); @@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd4f09b..b2146c4119b2 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -21,6 +21,7 @@ #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE 2 #define ARCH_HAS_SETCLEAR_HUGE_PTE #define ARCH_HAS_HUGE_PTE_TYPE @@ -30,11 +31,12 @@ #include <asm/setup.h> #ifndef __ASSEMBLY__ +void __storage_key_init_range(unsigned long start, unsigned long end); + static inline void storage_key_init_range(unsigned long start, unsigned long end) { -#if PAGE_DEFAULT_KEY - __storage_key_init_range(start, end); -#endif + if (PAGE_DEFAULT_KEY) + __storage_key_init_range(start, end); } #define clear_page(page) memset((page), 0, PAGE_SIZE) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 1f7ff85c5e4c..c64c0befd3f3 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -86,16 +86,4 @@ struct sf_raw_sample { u8 padding[]; /* Padding to next multiple of 8 */ } __packed; -/* Perf hardware reserve and release functions */ -#ifdef CONFIG_PERF_EVENTS -int perf_reserve_sampling(void); -void perf_release_sampling(void); -#else /* CONFIG_PERF_EVENTS */ -static inline int perf_reserve_sampling(void) -{ - return 0; -} -static inline void perf_release_sampling(void) {} -#endif /* CONFIG_PERF_EVENTS */ - #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 18d2beb89340..ea1533e07271 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -28,12 +28,33 @@ #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/radix-tree.h> +#include <linux/atomic.h> #include <asm/bug.h> #include <asm/page.h> -extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); +extern pgd_t swapper_pg_dir[]; extern void paging_init(void); extern void vmem_map_init(void); +pmd_t *vmem_pmd_alloc(void); +pte_t *vmem_pte_alloc(void); + +enum { + PG_DIRECT_MAP_4K = 0, + PG_DIRECT_MAP_1M, + PG_DIRECT_MAP_2G, + PG_DIRECT_MAP_MAX +}; + +extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; + +static inline void update_page_count(int level, long count) +{ + if (IS_ENABLED(CONFIG_PROC_FS)) + atomic_long_add(count, &direct_pages_count[level]); +} + +struct seq_file; +void arch_report_meminfo(struct seq_file *m); /* * The S390 doesn't have any external MMU info: the kernel page @@ -270,8 +291,23 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ -#define _REGION3_ENTRY_RO 0x200 /* page protection bit */ +#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ +#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */ + +#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ +#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ +#define _REGION3_ENTRY_READ 0x0002 /* SW region read bit */ +#define _REGION3_ENTRY_WRITE 0x0001 /* SW region write bit */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#else +#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ +#endif + +#define _REGION_ENTRY_BITS 0xfffffffffffff227UL +#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -297,7 +333,8 @@ static inline int is_module_addr(void *addr) #endif /* - * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * Segment table and region3 table entry encoding + * (R = read-only, I = invalid, y = young bit): * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 @@ -391,6 +428,33 @@ static inline int is_module_addr(void *addr) _SEGMENT_ENTRY_READ) #define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) +#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ + _SEGMENT_ENTRY_LARGE | \ + _SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE | \ + _SEGMENT_ENTRY_YOUNG | \ + _SEGMENT_ENTRY_DIRTY) +#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ + _SEGMENT_ENTRY_LARGE | \ + _SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_YOUNG | \ + _SEGMENT_ENTRY_PROTECT) + +/* + * Region3 entry (large page) protection definitions. + */ + +#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_DIRTY) +#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -424,6 +488,53 @@ static inline int mm_use_skey(struct mm_struct *mm) return 0; } +static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + unsigned long address = (unsigned long)ptr | 1; + + asm volatile( + " csp %0,%3" + : "+d" (reg2), "+m" (*ptr) + : "d" (reg3), "d" (address) + : "cc"); +} + +static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + unsigned long address = (unsigned long)ptr | 1; + + asm volatile( + " .insn rre,0xb98a0000,%0,%3" + : "+d" (reg2), "+m" (*ptr) + : "d" (reg3), "d" (address) + : "cc"); +} + +#define CRDTE_DTT_PAGE 0x00UL +#define CRDTE_DTT_SEGMENT 0x10UL +#define CRDTE_DTT_REGION3 0x14UL +#define CRDTE_DTT_REGION2 0x18UL +#define CRDTE_DTT_REGION1 0x1cUL + +static inline void crdte(unsigned long old, unsigned long new, + unsigned long table, unsigned long dtt, + unsigned long address, unsigned long asce) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + register unsigned long reg4 asm("4") = table | dtt; + register unsigned long reg5 asm("5") = address; + + asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0" + : "+d" (reg2) + : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce) + : "memory", "cc"); +} + /* * pgd/pmd/pte query functions */ @@ -465,7 +576,7 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; + return pud_val(pud) == _REGION3_ENTRY_EMPTY; } static inline int pud_large(pud_t pud) @@ -475,17 +586,35 @@ static inline int pud_large(pud_t pud) return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } +static inline unsigned long pud_pfn(pud_t pud) +{ + unsigned long origin_mask; + + origin_mask = _REGION3_ENTRY_ORIGIN; + if (pud_large(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; +} + +static inline int pmd_large(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; +} + +static inline int pmd_bad(pmd_t pmd) +{ + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; +} + static inline int pud_bad(pud_t pud) { - /* - * With dynamic page table levels the pud can be a region table - * entry or a segment table entry. Check for the bit that are - * invalid for either table entry. - */ - unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & - ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; - return (pud_val(pud) & mask) != 0; + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return pmd_bad(__pmd(pud_val(pud))); + if (pud_large(pud)) + return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0; + return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; } static inline int pmd_present(pmd_t pmd) @@ -498,11 +627,6 @@ static inline int pmd_none(pmd_t pmd) return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; } -static inline int pmd_large(pmd_t pmd) -{ - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; -} - static inline unsigned long pmd_pfn(pmd_t pmd) { unsigned long origin_mask; @@ -513,13 +637,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -static inline int pmd_bad(pmd_t pmd) -{ - if (pmd_large(pmd)) - return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; - return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; -} - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -963,6 +1080,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) +#define pud_page(pud) pfn_to_page(pud_pfn(pud)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) @@ -970,20 +1088,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) -static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) -{ - /* - * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) - * Convert to segment table entry format. - */ - if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) - return pgprot_val(SEGMENT_NONE); - if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) - return pgprot_val(SEGMENT_READ); - return pgprot_val(SEGMENT_WRITE); -} - static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; @@ -1020,6 +1124,56 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +static inline pud_t pud_wrprotect(pud_t pud) +{ + pud_val(pud) &= ~_REGION3_ENTRY_WRITE; + pud_val(pud) |= _REGION_ENTRY_PROTECT; + return pud; +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + pud_val(pud) |= _REGION3_ENTRY_WRITE; + if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY)) + return pud; + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + return pud; +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + if (pud_large(pud)) { + pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; + pud_val(pud) |= _REGION_ENTRY_PROTECT; + } + return pud; +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + if (pud_large(pud)) { + pud_val(pud) |= _REGION3_ENTRY_DIRTY | + _REGION3_ENTRY_SOFT_DIRTY; + if (pud_val(pud) & _REGION3_ENTRY_WRITE) + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + } + return pud; +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) +{ + /* + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) + * Convert to segment table entry format. + */ + if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) + return pgprot_val(SEGMENT_NONE); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + static inline pmd_t pmd_mkyoung(pmd_t pmd) { if (pmd_large(pmd)) { @@ -1068,15 +1222,8 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) static inline void __pmdp_csp(pmd_t *pmdp) { - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INVALID; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); + csp((unsigned int *)pmdp + 1, pmd_val(*pmdp), + pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); } static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) @@ -1091,6 +1238,19 @@ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) : "cc" ); } +static inline void __pudp_idte(unsigned long address, pud_t *pudp) +{ + unsigned long r3o; + + r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o |= _ASCE_TYPE_REGION3; + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pudp) + : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) + : "cc"); +} + static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) { unsigned long sto; @@ -1103,8 +1263,22 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } +static inline void __pudp_idte_local(unsigned long address, pud_t *pudp) +{ + unsigned long r3o; + + r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o |= _ASCE_TYPE_REGION3; + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,1" + : "=m" (*pudp) + : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) + : "cc"); +} + pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 9d4d311d7e52..09529202ea77 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -77,7 +77,10 @@ static inline void get_cpu_id(struct cpuid *ptr) asm volatile("stidp %0" : "=Q" (*ptr)); } -extern void s390_adjust_jiffies(void); +void s390_adjust_jiffies(void); +void s390_update_cpu_mhz(void); +void cpu_detect_mhz_feature(void); + extern const struct seq_operations cpuinfo_op; extern int sysctl_ieee_emulation_warnings; extern void execve_tail(void); @@ -233,6 +236,18 @@ void cpu_relax(void); #define cpu_relax_lowlatency() barrier() +#define ECAG_CACHE_ATTRIBUTE 0 +#define ECAG_CPU_ATTRIBUTE 1 + +static inline unsigned long __ecag(unsigned int asi, unsigned char parm) +{ + unsigned long val; + + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (asi << 8 | parm)); + return val; +} + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index fbd9116eb17b..5ce29fe100ba 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,5 +4,6 @@ #include <asm-generic/sections.h> extern char _eshared[], _ehead[]; +extern char __start_ro_after_init[], __end_ro_after_init[]; #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index c0f0efbb6ab5..5e8d57e1cc5e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -86,9 +86,13 @@ extern char vmpoff_cmd[]; #define CONSOLE_IS_SCLP (console_mode == 1) #define CONSOLE_IS_3215 (console_mode == 2) #define CONSOLE_IS_3270 (console_mode == 3) +#define CONSOLE_IS_VT220 (console_mode == 4) +#define CONSOLE_IS_HVC (console_mode == 5) #define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) +#define SET_CONSOLE_VT220 do { console_mode = 4; } while (0) +#define SET_CONSOLE_HVC do { console_mode = 5; } while (0) #define NSS_NAME_SIZE 8 extern char kernel_nss_name[]; diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h deleted file mode 100644 index 4e16aede4b06..000000000000 --- a/arch/s390/include/asm/sfp-machine.h +++ /dev/null @@ -1,142 +0,0 @@ -/* Machine-dependent software floating-point definitions. - S/390 kernel version. - Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Richard Henderson (rth@cygnus.com), - Jakub Jelinek (jj@ultra.linux.cz), - David S. Miller (davem@redhat.com) and - Peter Maydell (pmaydell@chiark.greenend.org.uk). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef _SFP_MACHINE_H -#define _SFP_MACHINE_H - - -#define _FP_W_TYPE_SIZE 32 -#define _FP_W_TYPE unsigned int -#define _FP_WS_TYPE signed int -#define _FP_I_TYPE int - -#define _FP_MUL_MEAT_S(R,X,Y) \ - _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) -#define _FP_MUL_MEAT_D(R,X,Y) \ - _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) -#define _FP_MUL_MEAT_Q(R,X,Y) \ - _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) - -#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) -#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) -#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) - -#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) -#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 -#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 -#define _FP_NANSIGN_S 0 -#define _FP_NANSIGN_D 0 -#define _FP_NANSIGN_Q 0 - -#define _FP_KEEPNANFRACP 1 - -/* - * If one NaN is signaling and the other is not, - * we choose that one, otherwise we choose X. - */ -#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ - do { \ - if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ - && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ - { \ - R##_s = Y##_s; \ - _FP_FRAC_COPY_##wc(R,Y); \ - } \ - else \ - { \ - R##_s = X##_s; \ - _FP_FRAC_COPY_##wc(R,X); \ - } \ - R##_c = FP_CLS_NAN; \ - } while (0) - -/* Some assembly to speed things up. */ -#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ - unsigned int __r2 = (x2) + (y2); \ - unsigned int __r1 = (x1); \ - unsigned int __r0 = (x0); \ - asm volatile( \ - " alr %2,%3\n" \ - " brc 12,0f\n" \ - " lhi 0,1\n" \ - " alr %1,0\n" \ - " brc 12,0f\n" \ - " alr %0,0\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ - : "d" (y0), "i" (1) : "cc", "0" ); \ - asm volatile( \ - " alr %1,%2\n" \ - " brc 12,0f\n" \ - " ahi %0,1\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1) \ - : "d" (y1) : "cc"); \ - (r2) = __r2; \ - (r1) = __r1; \ - (r0) = __r0; \ -}) - -#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ - unsigned int __r2 = (x2) - (y2); \ - unsigned int __r1 = (x1); \ - unsigned int __r0 = (x0); \ - asm volatile( \ - " slr %2,%3\n" \ - " brc 3,0f\n" \ - " lhi 0,1\n" \ - " slr %1,0\n" \ - " brc 3,0f\n" \ - " slr %0,0\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ - : "d" (y0) : "cc", "0"); \ - asm volatile( \ - " slr %1,%2\n" \ - " brc 3,0f\n" \ - " ahi %0,-1\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1) \ - : "d" (y1) : "cc"); \ - (r2) = __r2; \ - (r1) = __r1; \ - (r0) = __r0; \ -}) - -#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0) - -/* Obtain the current rounding mode. */ -#define FP_ROUNDMODE mode - -/* Exception flags. */ -#define FP_EX_INVALID 0x800000 -#define FP_EX_DIVZERO 0x400000 -#define FP_EX_OVERFLOW 0x200000 -#define FP_EX_UNDERFLOW 0x100000 -#define FP_EX_INEXACT 0x080000 - -/* We write the results always */ -#define FP_INHIBIT_RESULTS 0 - -#endif diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h deleted file mode 100644 index c8b7cf9d6279..000000000000 --- a/arch/s390/include/asm/sfp-util.h +++ /dev/null @@ -1,67 +0,0 @@ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <asm/byteorder.h> - -#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - asm volatile( \ - " alr %1,%3\n" \ - " brc 12,0f\n" \ - " ahi %0,1\n" \ - "0: alr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc"); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - asm volatile( \ - " slr %1,%3\n" \ - " brc 3,0f\n" \ - " ahi %0,-1\n" \ - "0: slr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc"); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */ -#define umul_ppmm(wh, wl, u, v) ({ \ - unsigned int __wh = u; \ - unsigned int __wl = v; \ - asm volatile( \ - " ltr 1,%0\n" \ - " mr 0,%1\n" \ - " jnm 0f\n" \ - " alr 0,%1\n" \ - "0: ltr %1,%1\n" \ - " jnm 1f\n" \ - " alr 0,%0\n" \ - "1: lr %0,0\n" \ - " lr %1,1\n" \ - : "+d" (__wh), "+d" (__wl) \ - : : "0", "1", "cc"); \ - wh = __wh; \ - wl = __wl; \ -}) - -#define udiv_qrnnd(q, r, n1, n0, d) \ - do { unsigned long __n; \ - unsigned int __r, __d; \ - __n = ((unsigned long)(n1) << 32) + n0; \ - __d = (d); \ - (q) = __n / __d; \ - (r) = __n % __d; \ - } while (0) - -#define UDIV_NEEDS_NORMALIZATION 0 - -#define abort() BUG() - -#define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 1c8f33fca356..72df5f2de6b0 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -37,8 +37,8 @@ #ifndef __ASSEMBLY__ -static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, - u32 *status) +static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) { register unsigned long reg1 asm ("1") = parm; int cc; @@ -48,8 +48,19 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, " ipm %0\n" " srl %0,28\n" : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + *status = reg1; + return cc; +} + +static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) +{ + u32 _status; + int cc; + + cc = ____pcpu_sigp(addr, order, parm, &_status); if (status && cc == 1) - *status = reg1; + *status = _status; return cc; } diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h new file mode 100644 index 000000000000..7689727585b2 --- /dev/null +++ b/arch/s390/include/asm/stp.h @@ -0,0 +1,51 @@ +/* + * Copyright IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ +#ifndef __S390_STP_H +#define __S390_STP_H + +/* notifier for syncs */ +extern struct atomic_notifier_head s390_epoch_delta_notifier; + +/* STP interruption parameter */ +struct stp_irq_parm { + unsigned int _pad0 : 14; + unsigned int tsc : 1; /* Timing status change */ + unsigned int lac : 1; /* Link availability change */ + unsigned int tcpc : 1; /* Time control parameter change */ + unsigned int _pad2 : 15; +} __attribute__ ((packed)); + +#define STP_OP_SYNC 1 +#define STP_OP_CTRL 3 + +struct stp_sstpi { + unsigned int rsvd0; + unsigned int rsvd1 : 8; + unsigned int stratum : 8; + unsigned int vbits : 16; + unsigned int leaps : 16; + unsigned int tmd : 4; + unsigned int ctn : 4; + unsigned int rsvd2 : 3; + unsigned int c : 1; + unsigned int tst : 4; + unsigned int tzo : 16; + unsigned int dsto : 16; + unsigned int ctrl : 16; + unsigned int rsvd3 : 16; + unsigned int tto; + unsigned int rsvd4; + unsigned int ctnid[3]; + unsigned int rsvd5; + unsigned int todoff[4]; + unsigned int rsvd6[48]; +} __attribute__ ((packed)); + +/* Functions needed by the machine check handler */ +int stp_sync_check(void); +int stp_island_check(void); +void stp_queue_work(void); + +#endif /* __S390_STP_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index dcb6312a0b91..0bb08f341c09 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -52,6 +52,70 @@ static inline void store_clock_comparator(__u64 *time) void clock_comparator_work(void); +void __init ptff_init(void); + +extern unsigned char ptff_function_mask[16]; +extern unsigned long lpar_offset; +extern unsigned long initial_leap_seconds; + +/* Function codes for the ptff instruction. */ +#define PTFF_QAF 0x00 /* query available functions */ +#define PTFF_QTO 0x01 /* query tod offset */ +#define PTFF_QSI 0x02 /* query steering information */ +#define PTFF_QUI 0x04 /* query UTC information */ +#define PTFF_ATO 0x40 /* adjust tod offset */ +#define PTFF_STO 0x41 /* set tod offset */ +#define PTFF_SFS 0x42 /* set fine steering rate */ +#define PTFF_SGS 0x43 /* set gross steering rate */ + +/* Query TOD offset result */ +struct ptff_qto { + unsigned long long physical_clock; + unsigned long long tod_offset; + unsigned long long logical_tod_offset; + unsigned long long tod_epoch_difference; +} __packed; + +static inline int ptff_query(unsigned int nr) +{ + unsigned char *ptr; + + ptr = ptff_function_mask + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + +/* Query UTC information result */ +struct ptff_qui { + unsigned int tm : 2; + unsigned int ts : 2; + unsigned int : 28; + unsigned int pad_0x04; + unsigned long leap_event; + short old_leap; + short new_leap; + unsigned int pad_0x14; + unsigned long prt[5]; + unsigned long cst[3]; + unsigned int skew; + unsigned int pad_0x5c[41]; +} __packed; + +static inline int ptff(void *ptff_block, size_t len, unsigned int func) +{ + typedef struct { char _[len]; } addrtype; + register unsigned int reg0 asm("0") = func; + register unsigned long reg1 asm("1") = (unsigned long) ptff_block; + int rc; + + asm volatile( + " .word 0x0104\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "+m" (*(addrtype *) ptff_block) + : "d" (reg0), "d" (reg1) : "cc"); + return rc; +} + static inline unsigned long long local_tick_disable(void) { unsigned long long old; @@ -105,7 +169,7 @@ static inline cycles_t get_cycles(void) return (cycles_t) get_tod_clock() >> 2; } -int get_sync_clock(unsigned long long *clock); +int get_phys_clock(unsigned long long *clock); void init_cpu_timer(void); unsigned long long monotonic_clock(void); diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index a2e6ef32e054..1a691ef740cf 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -5,6 +5,7 @@ #include <linux/sched.h> #include <asm/processor.h> #include <asm/pgalloc.h> +#include <asm/pgtable.h> /* * Flush all TLB entries on the local CPU. @@ -44,17 +45,9 @@ void smp_ptlb_all(void); */ static inline void __tlb_flush_global(void) { - register unsigned long reg2 asm("2"); - register unsigned long reg3 asm("3"); - register unsigned long reg4 asm("4"); - long dummy; - - dummy = 0; - reg2 = reg3 = 0; - reg4 = ((unsigned long) &dummy) + 1; - asm volatile( - " csp %0,%2" - : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); + unsigned int dummy = 0; + + csp(&dummy, 0, 0); } /* @@ -64,7 +57,7 @@ static inline void __tlb_flush_global(void) static inline void __tlb_flush_full(struct mm_struct *mm) { preempt_disable(); - atomic_add(0x10000, &mm->context.attach_count); + atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { /* Local TLB flush */ __tlb_flush_local(); @@ -76,21 +69,19 @@ static inline void __tlb_flush_full(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } /* - * Flush TLB entries for a specific ASCE on all CPUs. + * Flush TLB entries for a specific ASCE on all CPUs. Should never be used + * when more than one asce (e.g. gmap) ran on this mm. */ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - int active, count; - preempt_disable(); - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { __tlb_flush_idte_local(asce); } else { @@ -103,7 +94,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 6b53962e807e..f15f5571ca2b 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -14,10 +14,12 @@ struct cpu_topology_s390 { unsigned short core_id; unsigned short socket_id; unsigned short book_id; + unsigned short drawer_id; unsigned short node_id; cpumask_t thread_mask; cpumask_t core_mask; cpumask_t book_mask; + cpumask_t drawer_mask; }; DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); @@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) #define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask) +#define topology_drawer_id(cpu) (per_cpu(cpu_topology, cpu).drawer_id) +#define topology_drawer_cpumask(cpu) (&per_cpu(cpu_topology, cpu).drawer_mask) #define mc_capable() 1 diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index e0900ddf91dd..9b49cf1daa8f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -151,8 +151,65 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, __rc; \ }) -#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL) -#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL) +static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +{ + unsigned long spec = 0x810000UL; + int rc; + + switch (size) { + case 1: + rc = __put_get_user_asm((unsigned char __user *)ptr, + (unsigned char *)x, + size, spec); + break; + case 2: + rc = __put_get_user_asm((unsigned short __user *)ptr, + (unsigned short *)x, + size, spec); + break; + case 4: + rc = __put_get_user_asm((unsigned int __user *)ptr, + (unsigned int *)x, + size, spec); + break; + case 8: + rc = __put_get_user_asm((unsigned long __user *)ptr, + (unsigned long *)x, + size, spec); + break; + }; + return rc; +} + +static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +{ + unsigned long spec = 0x81UL; + int rc; + + switch (size) { + case 1: + rc = __put_get_user_asm((unsigned char *)x, + (unsigned char __user *)ptr, + size, spec); + break; + case 2: + rc = __put_get_user_asm((unsigned short *)x, + (unsigned short __user *)ptr, + size, spec); + break; + case 4: + rc = __put_get_user_asm((unsigned int *)x, + (unsigned int __user *)ptr, + size, spec); + break; + case 8: + rc = __put_get_user_asm((unsigned long *)x, + (unsigned long __user *)ptr, + size, spec); + break; + }; + return rc; +} #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ @@ -191,7 +248,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s __put_user_bad(); \ break; \ } \ - __pu_err; \ + __builtin_expect(__pu_err, 0); \ }) #define put_user(x, ptr) \ @@ -240,7 +297,7 @@ int __put_user_bad(void) __attribute__((noreturn)); __get_user_bad(); \ break; \ } \ - __gu_err; \ + __builtin_expect(__gu_err, 0); \ }) #define get_user(x, ptr) \ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index a150f4fabe43..77630c74f13b 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -359,9 +359,9 @@ typedef struct per_cr_bits bits; } control_regs; /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing + * The single_step and instruction_fetch bits are obsolete, + * the kernel always sets them to zero. To enable single + * stepping use ptrace(PTRACE_SINGLESTEP) instead. */ unsigned single_step : 1; unsigned instruction_fetch : 1; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2f5586ab8a6a..f37be37edd3a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux kernel. # +KCOV_INSTRUMENT_early.o := n +KCOV_INSTRUMENT_sclp.o := n + ifdef CONFIG_FUNCTION_TRACER # Don't trace early setup code and tracing code CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) @@ -45,7 +48,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 77a84bd78be2..c8a83276a4dc 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level) static inline unsigned long ecag(int ai, int li, int ti) { - unsigned long cmd, val; - - cmd = ai << 4 | li << 1 | ti; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (cmd)); - return val; + return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti); } static void ci_leaf_init(struct cacheinfo *this_leaf, int private, diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 8cb9bfdd3ea8..43446fa2a4e5 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -26,7 +26,6 @@ #include <asm/dis.h> #include <asm/io.h> #include <linux/atomic.h> -#include <asm/mathemu.h> #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/debug.h> diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 69f9908ac44c..6693383bc01b 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); - if (task) - __dump_trace(func, data, sp, - (unsigned long)task_stack_page(task), - (unsigned long)task_stack_page(task) + THREAD_SIZE); - else - __dump_trace(func, data, sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + task = task ?: current; + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); } EXPORT_SYMBOL_GPL(dump_trace); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index a0684de5a93b..717b03aa16b5 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +static noinline __init void setup_arch_string(void) +{ + struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + + if (stsi(mach, 1, 1, 1)) + return; + EBCASC(mach->manufacturer, sizeof(mach->manufacturer)); + EBCASC(mach->type, sizeof(mach->type)); + EBCASC(mach->model, sizeof(mach->model)); + EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); + dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", + mach->manufacturer, + mach->type, + mach->model, + mach->model_capacity, + MACHINE_IS_LPAR ? "LPAR" : + MACHINE_IS_VM ? "z/VM" : + MACHINE_IS_KVM ? "KVM" : "unknown"); +} + static __init void setup_topology(void) { int max_mnest; @@ -447,11 +467,13 @@ void __init startup_init(void) ipl_save_parameters(); rescue_initrd(); clear_bss_section(); + ptff_init(); init_kernel_storage_key(); lockdep_off(); setup_lowcore_early(); setup_facility_list(); detect_machine_type(); + setup_arch_string(); ipl_update_parameters(); setup_boot_command_line(); create_kernel_nss(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 2d47f9cfcb36..c51650a1ed16 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -163,6 +163,16 @@ _PIF_WORK = (_PIF_PER_TRAP) .endm .section .kprobes.text, "ax" +.Ldummy: + /* + * This nop exists only in order to avoid that __switch_to starts at + * the beginning of the kprobes text section. In that case we would + * have several symbols at the same address. E.g. objdump would take + * an arbitrary symbol name when disassembling this code. + * With the added nop in between the __switch_to symbol is unique + * again. + */ + nop 0 /* * Scheduler resume function, called by switch_to @@ -175,7 +185,6 @@ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lgr %r1,%r2 aghi %r1,__TASK_thread # thread_struct of prev task - lg %r4,__TASK_thread_info(%r2) # get thread_info of prev lg %r5,__TASK_thread_info(%r3) # get thread_info of next stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev lgr %r1,%r3 diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c new file mode 100644 index 000000000000..81d1d1887507 --- /dev/null +++ b/arch/s390/kernel/fpu.c @@ -0,0 +1,249 @@ +/* + * In-kernel vector facility support functions + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <asm/fpu/types.h> +#include <asm/fpu/api.h> + +/* + * Per-CPU variable to maintain FPU register ranges that are in use + * by the kernel. + */ +static DEFINE_PER_CPU(u32, kernel_fpu_state); + +#define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) + + +void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) +{ + if (!__this_cpu_read(kernel_fpu_state)) { + /* + * Save user space FPU state and register contents. Multiple + * calls because of interruptions do not matter and return + * immediately. This also sets CIF_FPU to lazy restore FP/VX + * register contents when returning to user space. + */ + save_fpu_regs(); + } + + /* Update flags to use the vector facility for KERNEL_FPR */ + if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { + flags |= KERNEL_VXR_LOW | KERNEL_FPC; + flags &= ~KERNEL_FPR; + } + + /* Save and update current kernel VX state */ + state->mask = __this_cpu_read(kernel_fpu_state); + __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); + + /* + * If this is the first call to __kernel_fpu_begin(), no additional + * work is required. + */ + if (!(state->mask & KERNEL_FPU_STATE_MASK)) + return; + + /* + * If KERNEL_FPR is still set, the vector facility is not available + * and, thus, save floating-point control and registers only. + */ + if (state->mask & KERNEL_FPR) { + asm volatile("stfpc %0" : "=Q" (state->fpc)); + asm volatile("std 0,%0" : "=Q" (state->fprs[0])); + asm volatile("std 1,%0" : "=Q" (state->fprs[1])); + asm volatile("std 2,%0" : "=Q" (state->fprs[2])); + asm volatile("std 3,%0" : "=Q" (state->fprs[3])); + asm volatile("std 4,%0" : "=Q" (state->fprs[4])); + asm volatile("std 5,%0" : "=Q" (state->fprs[5])); + asm volatile("std 6,%0" : "=Q" (state->fprs[6])); + asm volatile("std 7,%0" : "=Q" (state->fprs[7])); + asm volatile("std 8,%0" : "=Q" (state->fprs[8])); + asm volatile("std 9,%0" : "=Q" (state->fprs[9])); + asm volatile("std 10,%0" : "=Q" (state->fprs[10])); + asm volatile("std 11,%0" : "=Q" (state->fprs[11])); + asm volatile("std 12,%0" : "=Q" (state->fprs[12])); + asm volatile("std 13,%0" : "=Q" (state->fprs[13])); + asm volatile("std 14,%0" : "=Q" (state->fprs[14])); + asm volatile("std 15,%0" : "=Q" (state->fprs[15])); + return; + } + + /* + * If this is a nested call to __kernel_fpu_begin(), check the saved + * state mask to save and later restore the vector registers that + * are already in use. Let's start with checking floating-point + * controls. + */ + if (state->mask & KERNEL_FPC) + asm volatile("stfpc %0" : "=m" (state->fpc)); + + /* Test and save vector registers */ + asm volatile ( + /* + * Test if any vector register must be saved and, if so, + * test if all register can be saved. + */ + " tmll %[m],15\n" /* KERNEL_VXR_MASK */ + " jz 20f\n" /* no work -> done */ + " la 1,%[vxrs]\n" /* load save area */ + " jo 18f\n" /* -> save V0..V31 */ + + /* + * Test if V8..V23 can be saved at once... this speeds up + * for KERNEL_fpu_MID only. Otherwise continue to split the + * range of vector registers into two halves and test them + * separately. + */ + " tmll %[m],6\n" /* KERNEL_VXR_MID */ + " jo 17f\n" /* -> save V8..V23 */ + + /* Test and save the first half of 16 vector registers */ + "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ + " jz 10f\n" /* -> KERNEL_VXR_HIGH */ + " jo 2f\n" /* 11 -> save V0..V15 */ + " brc 4,3f\n" /* 01 -> save V0..V7 */ + " brc 2,4f\n" /* 10 -> save V8..V15 */ + + /* Test and save the second half of 16 vector registers */ + "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ + " jo 19f\n" /* 11 -> save V16..V31 */ + " brc 4,11f\n" /* 01 -> save V16..V23 */ + " brc 2,12f\n" /* 10 -> save V24..V31 */ + " j 20f\n" /* 00 -> done */ + + /* + * Below are the vstm combinations to save multiple vector + * registers at once. + */ + "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "\n" + "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ + " j 20f\n" /* -> done */ + "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ + " j 20f\n" /* -> done */ + "\n" + "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ + " nill %[m],249\n" /* m &= ~VXR_MID */ + " j 1b\n" /* -> VXR_LOW */ + "\n" + "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + "20:" + : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) + : [m] "d" (state->mask) + : "1", "cc"); +} +EXPORT_SYMBOL(__kernel_fpu_begin); + +void __kernel_fpu_end(struct kernel_fpu *state) +{ + /* Just update the per-CPU state if there is nothing to restore */ + if (!(state->mask & KERNEL_FPU_STATE_MASK)) + goto update_fpu_state; + + /* + * If KERNEL_FPR is specified, the vector facility is not available + * and, thus, restore floating-point control and registers only. + */ + if (state->mask & KERNEL_FPR) { + asm volatile("lfpc %0" : : "Q" (state->fpc)); + asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); + asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); + asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); + asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); + asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); + asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); + asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); + asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); + asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); + asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); + asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); + asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); + asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); + asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); + asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); + asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); + goto update_fpu_state; + } + + /* Test and restore floating-point controls */ + if (state->mask & KERNEL_FPC) + asm volatile("lfpc %0" : : "Q" (state->fpc)); + + /* Test and restore (load) vector registers */ + asm volatile ( + /* + * Test if any vector registers must be loaded and, if so, + * test if all registers can be loaded at once. + */ + " tmll %[m],15\n" /* KERNEL_VXR_MASK */ + " jz 20f\n" /* no work -> done */ + " la 1,%[vxrs]\n" /* load load area */ + " jo 18f\n" /* -> load V0..V31 */ + + /* + * Test if V8..V23 can be restored at once... this speeds up + * for KERNEL_VXR_MID only. Otherwise continue to split the + * range of vector registers into two halves and test them + * separately. + */ + " tmll %[m],6\n" /* KERNEL_VXR_MID */ + " jo 17f\n" /* -> load V8..V23 */ + + /* Test and load the first half of 16 vector registers */ + "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ + " jz 10f\n" /* -> KERNEL_VXR_HIGH */ + " jo 2f\n" /* 11 -> load V0..V15 */ + " brc 4,3f\n" /* 01 -> load V0..V7 */ + " brc 2,4f\n" /* 10 -> load V8..V15 */ + + /* Test and load the second half of 16 vector registers */ + "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ + " jo 19f\n" /* 11 -> load V16..V31 */ + " brc 4,11f\n" /* 01 -> load V16..V23 */ + " brc 2,12f\n" /* 10 -> load V24..V31 */ + " j 20f\n" /* 00 -> done */ + + /* + * Below are the vstm combinations to load multiple vector + * registers at once. + */ + "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "\n" + "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ + " j 20f\n" /* -> done */ + "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ + " j 20f\n" /* -> done */ + "\n" + "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ + " nill %[m],249\n" /* m &= ~VXR_MID */ + " j 1b\n" /* -> VXR_LOW */ + "\n" + "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + "20:" + : + : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), + [m] "d" (state->mask) + : "1", "cc"); + +update_fpu_state: + /* Update current kernel VX state */ + __this_cpu_write(kernel_fpu_state, state->mask); +} +EXPORT_SYMBOL(__kernel_fpu_end); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index d14069d4b88d..295bfb7124bc 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type) * Must be in data section since the bss section * is not cleared when these are accessed. */ -static u8 ipl_ssid __attribute__((__section__(".data"))) = 0; -static u16 ipl_devno __attribute__((__section__(".data"))) = 0; -u32 ipl_flags __attribute__((__section__(".data"))) = 0; +static u8 ipl_ssid __section(.data) = 0; +static u16 ipl_devno __section(.data) = 0; +u32 ipl_flags __section(.data) = 0; enum ipl_method { REIPL_METHOD_CCW_CIO, @@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr) asm volatile( " diag %0,%2,0x308\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "+d" (_addr), "+d" (_rc) : "d" (subcode) : "cc", "memory"); @@ -563,7 +563,7 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); else if (ipl_info.type == IPL_TYPE_CCW) @@ -1085,21 +1085,24 @@ static void __reipl_run(void *unused) break; case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); - diag308(DIAG308_IPL, NULL); + if (MACHINE_IS_LPAR) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); + else + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RO_DIAG: - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RO_VM: __cpcmd("IPL", NULL, 0, NULL); break; case REIPL_METHOD_NSS_DIAG: diag308(DIAG308_SET, reipl_block_nss); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_NSS: get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); @@ -1108,7 +1111,7 @@ static void __reipl_run(void *unused) case REIPL_METHOD_DEFAULT: if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_DUMP: break; @@ -1423,7 +1426,7 @@ static void diag308_dump(void *dump_block) { diag308(DIAG308_SET, dump_block); while (1) { - if (diag308(DIAG308_DUMP, NULL) != 0x302) + if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302) break; udelay_simple(USEC_PER_SEC); } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c373a1d41d10..285d6561076d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); } - if (index < NR_IRQS) { - if (index >= NR_IRQS_BASE) - goto out; + if (index < NR_IRQS_BASE) { seq_printf(p, "%s: ", irqclass_main_desc[index].name); irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) @@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); goto out; } + if (index > NR_IRQS_BASE) + goto out; + for (index = 0; index < NR_ARCH_IRQS; index++) { seq_printf(p, "%s: ", irqclass_sub_desc[index].name); irq = irqclass_sub_desc[index].irq; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 0e64f08d3d69..3074c1d83829 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -24,6 +24,7 @@ #include <asm/diag.h> #include <asm/elf.h> #include <asm/asm-offsets.h> +#include <asm/cacheflush.h> #include <asm/os_info.h> #include <asm/switch_to.h> @@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, static int __init machine_kdump_pm_init(void) { pm_notifier(machine_kdump_pm_cb, 0); - /* Create initial mapping for crashkernel memory */ - arch_kexec_unprotect_crashkres(); return 0; } arch_initcall(machine_kdump_pm_init); @@ -150,42 +149,40 @@ static int kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP -/* - * Map or unmap crashkernel memory - */ -static void crash_map_pages(int enable) +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) { - unsigned long size = resource_size(&crashk_res); - - BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || - size % KEXEC_CRASH_MEM_ALIGN); - if (enable) - vmem_add_mapping(crashk_res.start, size); - else { - vmem_remove_mapping(crashk_res.start, size); - if (size) - os_info_crashkernel_add(crashk_res.start, size); - else - os_info_crashkernel_add(0, 0); - } + unsigned long addr, size; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); + size = begin - crashk_res.start; + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); +} + +static void crash_protect_pages(int protect) +{ + unsigned long size; + + if (!crashk_res.end) + return; + size = resource_size(&crashk_res); + if (protect) + set_memory_ro(crashk_res.start, size >> PAGE_SHIFT); + else + set_memory_rw(crashk_res.start, size >> PAGE_SHIFT); } -/* - * Unmap crashkernel memory - */ void arch_kexec_protect_crashkres(void) { - if (crashk_res.end) - crash_map_pages(0); + crash_protect_pages(1); } -/* - * Map crashkernel memory - */ void arch_kexec_unprotect_crashkres(void) { - if (crashk_res.end) - crash_map_pages(1); + crash_protect_pages(0); } #endif diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 07302ce37648..29376f0e725c 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <asm/lowcore.h> #include <asm/smp.h> -#include <asm/etr.h> +#include <asm/stp.h> #include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> @@ -27,7 +27,6 @@ struct mcck_struct { unsigned int kill_task : 1; unsigned int channel_report : 1; unsigned int warning : 1; - unsigned int etr_queue : 1; unsigned int stp_queue : 1; unsigned long mcck_code; }; @@ -82,8 +81,6 @@ void s390_handle_mcck(void) if (xchg(&mchchk_wng_posted, 1) == 0) kill_cad_pid(SIGPWR, 1); } - if (mcck.etr_queue) - etr_queue_work(); if (mcck.stp_queue) stp_queue_work(); if (mcck.kill_task) { @@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci) #define ED_STP_ISLAND 6 /* External damage STP island check */ #define ED_STP_SYNC 7 /* External damage STP sync check */ -#define ED_ETR_SYNC 12 /* External damage ETR sync check */ -#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ /* * machine check handler. @@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs) } if (mci.ed && mci.ec) { /* External damage */ - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) - mcck->etr_queue |= etr_sync_check(); - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) - mcck->etr_queue |= etr_switch_to_local(); if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) mcck->stp_queue |= stp_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); - if (mcck->etr_queue || mcck->stp_queue) + if (mcck->stp_queue) set_cpu_flag(CIF_MCCK_PENDING); } if (mci.se) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index a8e832166417..9ea26dface38 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -601,17 +601,12 @@ static void release_pmc_hardware(void) irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); on_each_cpu(setup_pmc_cpu, &flags, 1); - perf_release_sampling(); } static int reserve_pmc_hardware(void) { int flags = PMC_INIT; - int err; - err = perf_reserve_sampling(); - if (err) - return err; on_each_cpu(setup_pmc_cpu, &flags, 1); if (flags & PMC_FAILURE) { release_pmc_hardware(); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 87035fa58bbe..17431f63de00 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%04llx,name=%s\n", pmu_attr->id, attr->attr.name); } - -/* Reserve/release functions for sharing perf hardware */ -static DEFINE_SPINLOCK(perf_hw_owner_lock); -static void *perf_sampling_owner; - -int perf_reserve_sampling(void) -{ - int err; - - err = 0; - spin_lock(&perf_hw_owner_lock); - if (perf_sampling_owner) { - pr_warn("The sampling facility is already reserved by %p\n", - perf_sampling_owner); - err = -EBUSY; - } else - perf_sampling_owner = __builtin_return_address(0); - spin_unlock(&perf_hw_owner_lock); - return err; -} -EXPORT_SYMBOL(perf_reserve_sampling); - -void perf_release_sampling(void) -{ - spin_lock(&perf_hw_owner_lock); - WARN_ON(!perf_sampling_owner); - perf_sampling_owner = NULL; - spin_unlock(&perf_hw_owner_lock); -} -EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index de7451065c34..81d0808085e6 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -13,12 +13,45 @@ #include <linux/delay.h> #include <linux/cpu.h> #include <asm/diag.h> +#include <asm/facility.h> #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> #include <asm/smp.h> -static DEFINE_PER_CPU(struct cpuid, cpu_id); +struct cpu_info { + unsigned int cpu_mhz_dynamic; + unsigned int cpu_mhz_static; + struct cpuid cpu_id; +}; + +static DEFINE_PER_CPU(struct cpu_info, cpu_info); + +static bool machine_has_cpu_mhz; + +void __init cpu_detect_mhz_feature(void) +{ + if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL) + machine_has_cpu_mhz = 1; +} + +static void update_cpu_mhz(void *arg) +{ + unsigned long mhz; + struct cpu_info *c; + + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + c = this_cpu_ptr(&cpu_info); + c->cpu_mhz_dynamic = mhz >> 32; + c->cpu_mhz_static = mhz & 0xffffffff; +} + +void s390_update_cpu_mhz(void) +{ + s390_adjust_jiffies(); + if (machine_has_cpu_mhz) + on_each_cpu(update_cpu_mhz, NULL, 0); +} void notrace cpu_relax(void) { @@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax); */ void cpu_init(void) { - struct cpuid *id = this_cpu_ptr(&cpu_id); + struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id); get_cpu_id(id); + if (machine_has_cpu_mhz) + update_cpu_mhz(NULL); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; BUG_ON(current->mm); @@ -53,10 +88,7 @@ int cpu_have_feature(unsigned int num) } EXPORT_SYMBOL(cpu_have_feature); -/* - * show_cpuinfo - Get information on one CPU for use by procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) +static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", @@ -65,34 +97,55 @@ static int show_cpuinfo(struct seq_file *m, void *v) static const char * const int_hwcap_str[] = { "sie" }; - unsigned long n = (unsigned long) v - 1; - int i; - - if (!n) { - s390_adjust_jiffies(); - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) - if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) - seq_printf(m, "%s ", int_hwcap_str[i]); - seq_puts(m, "\n"); - show_cacheinfo(m); - } - if (cpu_online(n)) { - struct cpuid *id = &per_cpu(cpu_id, n); - seq_printf(m, "processor %li: " + int i, cpu; + + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_printf(m, "max thread id : %d\n", smp_cpu_mtid); + seq_puts(m, "features\t: "); + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); + seq_puts(m, "\n"); + show_cacheinfo(m); + for_each_online_cpu(cpu) { + struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); + + seq_printf(m, "processor %d: " "version = %02X, " "identification = %06X, " "machine = %04X\n", - n, id->version, id->ident, id->machine); + cpu, id->version, id->ident, id->machine); } +} + +static void show_cpu_mhz(struct seq_file *m, unsigned long n) +{ + struct cpu_info *c = per_cpu_ptr(&cpu_info, n); + + seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic); + seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long n = (unsigned long) v - 1; + + if (!n) + show_cpu_summary(m, v); + if (!machine_has_cpu_mhz) + return 0; + seq_printf(m, "\ncpu number : %ld\n", n); + show_cpu_mhz(m, n); return 0; } @@ -126,4 +179,3 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; - diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f31939147ccd..ba5f456edaa9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -130,17 +130,14 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { - if (MACHINE_IS_KVM) { - if (sclp.has_vt220) - add_preferred_console("ttyS", 1, NULL); - else if (sclp.has_linemode) - add_preferred_console("ttyS", 0, NULL); - else - add_preferred_console("hvc", 0, NULL); - } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) add_preferred_console("ttyS", 0, NULL); else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); + else if (CONSOLE_IS_VT220) + add_preferred_console("ttyS", 1, NULL); + else if (CONSOLE_IS_HVC) + add_preferred_console("hvc", 0, NULL); } static int __init conmode_setup(char *str) @@ -206,6 +203,15 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } + } else if (MACHINE_IS_KVM) { + if (sclp.has_vt220 && + config_enabled(CONFIG_SCLP_VT220_CONSOLE)) + SET_CONSOLE_VT220; + else if (sclp.has_linemode && + config_enabled(CONFIG_SCLP_CONSOLE)) + SET_CONSOLE_SCLP; + else + SET_CONSOLE_HVC; } else { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; @@ -289,7 +295,7 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -void *restart_stack __attribute__((__section__(".data"))); +void *restart_stack __section(.data); static void __init setup_lowcore(void) { @@ -432,6 +438,20 @@ static void __init setup_resources(void) } } } +#ifdef CONFIG_CRASH_DUMP + /* + * Re-add removed crash kernel memory as reserved memory. This makes + * sure it will be mapped with the identity mapping and struct pages + * will be created, so it can be resized later on. + * However add it later since the crash kernel resource should not be + * part of the System RAM resource. + */ + if (crashk_res.end) { + memblock_add(crashk_res.start, resource_size(&crashk_res)); + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); + insert_resource(&iomem_resource, &crashk_res); + } +#endif } static void __init setup_memory_end(void) @@ -602,7 +622,6 @@ static void __init reserve_crashkernel(void) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); memblock_remove(crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", @@ -901,6 +920,7 @@ void __init setup_arch(char **cmdline_p) setup_vmcoreinfo(); setup_lowcore(); smp_fill_possible_mask(); + cpu_detect_mhz_feature(); cpu_init(); numa_setup(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7b89a7572100..35531fe1c5ea 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct lowcore *lc = pcpu->lowcore; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); - atomic_inc(&init_mm.context.attach_count); lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->percpu_offset = __per_cpu_offset[cpu]; @@ -320,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), */ static int pcpu_set_smt(unsigned int mtid) { - register unsigned long reg1 asm ("1") = (unsigned long) mtid; int cc; if (smp_cpu_mtid == mtid) return 0; - asm volatile( - " sigp %1,0,%2 # sigp set multi-threading\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING) - : "cc"); + cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); if (cc == 0) { smp_cpu_mtid = mtid; smp_cpu_mt_shift = 0; @@ -876,10 +868,8 @@ void __cpu_die(unsigned int cpu) while (!pcpu_stopped(pcpu)) cpu_relax(); pcpu_free_lowcore(pcpu); - atomic_dec(&init_mm.context.attach_count); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } void __noreturn cpu_die(void) @@ -897,7 +887,7 @@ void __init smp_fill_possible_mask(void) sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; sclp_max = min(smp_max_threads, sclp_max); - sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids; + sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; possible = min(possible, sclp_max); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index f7dba3887a54..050b8d067d3b 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -16,21 +16,11 @@ #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> - -/* Sigh, math-emu. Don't ask. */ -#include <asm/sfp-util.h> -#include <math-emu/soft-fp.h> -#include <math-emu/single.h> +#include <asm/fpu/api.h> int topology_max_mnest; -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) +static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) { register int r0 asm("0") = (fc << 28) | sel1; register int r1 asm("1") = sel2; @@ -45,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) : "+d" (r0), "+d" (rc) : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) : "cc", "memory"); + *lvl = ((unsigned int) r0) >> 28; + return rc; +} + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + int lvl, rc; + + rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); if (rc) return rc; - return fc ? 0 : ((unsigned int) r0) >> 28; + return fc ? 0 : lvl; } EXPORT_SYMBOL(stsi); @@ -414,10 +419,8 @@ subsys_initcall(create_proc_service_level); void s390_adjust_jiffies(void) { struct sysinfo_1_2_2 *info; - const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ - FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); - FP_DECL_EX; - unsigned int capability; + unsigned long capability; + struct kernel_fpu fpu; info = (void *) get_zeroed_page(GFP_KERNEL); if (!info) @@ -433,15 +436,25 @@ void s390_adjust_jiffies(void) * higher cpu capacity. Bogomips are the other way round. * To get to a halfway suitable number we divide 1e7 * by the cpu capability number. Yes, that means a floating - * point division .. math-emu here we come :-) + * point division .. */ - FP_UNPACK_SP(SA, &fmil); - if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, (long) info->capability, 64, long); - else - FP_UNPACK_SP(SB, &info->capability); - FP_DIV_S(SR, SA, SB); - FP_TO_INT_S(capability, SR, 32, 0); + kernel_fpu_begin(&fpu, KERNEL_FPR); + asm volatile( + " sfpc %3\n" + " l %0,%1\n" + " tmlh %0,0xff80\n" + " jnz 0f\n" + " cefbr %%f2,%0\n" + " j 1f\n" + "0: le %%f2,%1\n" + "1: cefbr %%f0,%2\n" + " debr %%f0,%%f2\n" + " cgebr %0,5,%%f0\n" + : "=&d" (capability) + : "Q" (info->capability), "d" (10000000), "d" (0) + : "cc" + ); + kernel_fpu_end(&fpu); } else /* * Really old machine without stsi block for basic diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9409d32f285e..4e9949800562 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -39,13 +39,14 @@ #include <linux/gfp.h> #include <linux/kprobes.h> #include <asm/uaccess.h> +#include <asm/facility.h> #include <asm/delay.h> #include <asm/div64.h> #include <asm/vdso.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/vtimer.h> -#include <asm/etr.h> +#include <asm/stp.h> #include <asm/cio.h> #include "entry.h" @@ -61,6 +62,32 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier); EXPORT_SYMBOL(s390_epoch_delta_notifier); +unsigned char ptff_function_mask[16]; +unsigned long lpar_offset; +unsigned long initial_leap_seconds; + +/* + * Get time offsets with PTFF + */ +void __init ptff_init(void) +{ + struct ptff_qto qto; + struct ptff_qui qui; + + if (!test_facility(28)) + return; + ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF); + + /* get LPAR offset */ + if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + lpar_offset = qto.tod_epoch_difference; + + /* get initial leap seconds */ + if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0) + initial_leap_seconds = (unsigned long) + ((long) qui.old_leap * 4096000000L); +} + /* * Scheduler clock - returns current time in nanosec units. */ @@ -162,30 +189,32 @@ static void clock_comparator_interrupt(struct ext_code ext_code, set_clock_comparator(S390_lowcore.clock_comparator); } -static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { inc_irq_stat(IRQEXT_TLA); - if (param32 & 0x00c40000) - etr_timing_alert((struct etr_irq_parm *) ¶m32); if (param32 & 0x00038000) stp_timing_alert((struct stp_irq_parm *) ¶m32); } -static void etr_reset(void); static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); + __u64 clock; + + clock = get_tod_clock() - initial_leap_seconds; + tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); } void read_boot_clock64(struct timespec64 *ts) { - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); + __u64 clock; + + clock = sched_clock_base_cc - initial_leap_seconds; + tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) @@ -269,7 +298,6 @@ void update_vsyscall_tz(void) void __init time_init(void) { /* Reset time synchronization interfaces. */ - etr_reset(); stp_reset(); /* request the clock comparator external interrupt */ @@ -337,20 +365,20 @@ static unsigned long clock_sync_flags; #define CLOCK_SYNC_STP 3 /* - * The synchronous get_clock function. It will write the current clock - * value to the clock pointer and return 0 if the clock is in sync with - * the external time source. If the clock mode is local it will return - * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external - * reference. + * The get_clock function for the physical clock. It will get the current + * TOD clock, subtract the LPAR offset and write the result to *clock. + * The function returns 0 if the clock is in sync with the external time + * source. If the clock mode is local it will return -EOPNOTSUPP and + * -EAGAIN if the clock is not in sync with the external reference. */ -int get_sync_clock(unsigned long long *clock) +int get_phys_clock(unsigned long long *clock) { atomic_t *sw_ptr; unsigned int sw0, sw1; sw_ptr = &get_cpu_var(clock_sync_word); sw0 = atomic_read(sw_ptr); - *clock = get_tod_clock(); + *clock = get_tod_clock() - lpar_offset; sw1 = atomic_read(sw_ptr); put_cpu_var(clock_sync_word); if (sw0 == sw1 && (sw0 & 0x80000000U)) @@ -364,7 +392,7 @@ int get_sync_clock(unsigned long long *clock) return -EACCES; return -EAGAIN; } -EXPORT_SYMBOL(get_sync_clock); +EXPORT_SYMBOL(get_phys_clock); /* * Make get_sync_clock return -EAGAIN. @@ -416,301 +444,6 @@ static void __init time_init_wq(void) time_sync_wq = create_singlethread_workqueue("timesync"); } -/* - * External Time Reference (ETR) code. - */ -static int etr_port0_online; -static int etr_port1_online; -static int etr_steai_available; - -static int __init early_parse_etr(char *p) -{ - if (strncmp(p, "off", 3) == 0) - etr_port0_online = etr_port1_online = 0; - else if (strncmp(p, "port0", 5) == 0) - etr_port0_online = 1; - else if (strncmp(p, "port1", 5) == 0) - etr_port1_online = 1; - else if (strncmp(p, "on", 2) == 0) - etr_port0_online = etr_port1_online = 1; - return 0; -} -early_param("etr", early_parse_etr); - -enum etr_event { - ETR_EVENT_PORT0_CHANGE, - ETR_EVENT_PORT1_CHANGE, - ETR_EVENT_PORT_ALERT, - ETR_EVENT_SYNC_CHECK, - ETR_EVENT_SWITCH_LOCAL, - ETR_EVENT_UPDATE, -}; - -/* - * Valid bit combinations of the eacr register are (x = don't care): - * e0 e1 dp p0 p1 ea es sl - * 0 0 x 0 0 0 0 0 initial, disabled state - * 0 0 x 0 1 1 0 0 port 1 online - * 0 0 x 1 0 1 0 0 port 0 online - * 0 0 x 1 1 1 0 0 both ports online - * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode - * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode - * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync - * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync - * 0 1 x 1 1 1 0 0 both ports online, port 1 usable - * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync - * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync - * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode - * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode - * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync - * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync - * 1 0 x 1 1 1 0 0 both ports online, port 0 usable - * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync - * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync - * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync - * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync - */ -static struct etr_eacr etr_eacr; -static u64 etr_tolec; /* time of last eacr update */ -static struct etr_aib etr_port0; -static int etr_port0_uptodate; -static struct etr_aib etr_port1; -static int etr_port1_uptodate; -static unsigned long etr_events; -static struct timer_list etr_timer; - -static void etr_timeout(unsigned long dummy); -static void etr_work_fn(struct work_struct *work); -static DEFINE_MUTEX(etr_work_mutex); -static DECLARE_WORK(etr_work, etr_work_fn); - -/* - * Reset ETR attachment. - */ -static void etr_reset(void) -{ - etr_eacr = (struct etr_eacr) { - .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, - .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, - .es = 0, .sl = 0 }; - if (etr_setr(&etr_eacr) == 0) { - etr_tolec = get_tod_clock(); - set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - } else if (etr_port0_online || etr_port1_online) { - pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); - etr_port0_online = etr_port1_online = 0; - } -} - -static int __init etr_init(void) -{ - struct etr_aib aib; - - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return 0; - time_init_wq(); - /* Check if this machine has the steai instruction. */ - if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) - etr_steai_available = 1; - setup_timer(&etr_timer, etr_timeout, 0UL); - if (etr_port0_online) { - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - if (etr_port1_online) { - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - return 0; -} - -arch_initcall(etr_init); - -/* - * Two sorts of ETR machine checks. The architecture reads: - * "When a machine-check niterruption occurs and if a switch-to-local or - * ETR-sync-check interrupt request is pending but disabled, this pending - * disabled interruption request is indicated and is cleared". - * Which means that we can get etr_switch_to_local events from the machine - * check handler although the interruption condition is disabled. Lovely.. - */ - -/* - * Switch to local machine check. This is called when the last usable - * ETR port goes inactive. After switch to local the clock is not in sync. - */ -int etr_switch_to_local(void) -{ - if (!etr_eacr.sl) - return 0; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { - etr_eacr.es = etr_eacr.sl = 0; - etr_setr(&etr_eacr); - return 1; - } - return 0; -} - -/* - * ETR sync check machine check. This is called when the ETR OTE and the - * local clock OTE are farther apart than the ETR sync check tolerance. - * After a ETR sync check the clock is not in sync. The machine check - * is broadcasted to all cpus at the same time. - */ -int etr_sync_check(void) -{ - if (!etr_eacr.es) - return 0; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { - etr_eacr.es = 0; - etr_setr(&etr_eacr); - return 1; - } - return 0; -} - -void etr_queue_work(void) -{ - queue_work(time_sync_wq, &etr_work); -} - -/* - * ETR timing alert. There are two causes: - * 1) port state change, check the usability of the port - * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the - * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) - * or ETR-data word 4 (edf4) has changed. - */ -static void etr_timing_alert(struct etr_irq_parm *intparm) -{ - if (intparm->pc0) - /* ETR port 0 state change. */ - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - if (intparm->pc1) - /* ETR port 1 state change. */ - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - if (intparm->eai) - /* - * ETR port alert on either port 0, 1 or both. - * Both ports are not up-to-date now. - */ - set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -static void etr_timeout(unsigned long dummy) -{ - set_bit(ETR_EVENT_UPDATE, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -/* - * Check if the etr mode is pss. - */ -static inline int etr_mode_is_pps(struct etr_eacr eacr) -{ - return eacr.es && !eacr.sl; -} - -/* - * Check if the etr mode is etr. - */ -static inline int etr_mode_is_etr(struct etr_eacr eacr) -{ - return eacr.es && eacr.sl; -} - -/* - * Check if the port can be used for TOD synchronization. - * For PPS mode the port has to receive OTEs. For ETR mode - * the port has to receive OTEs, the ETR stepping bit has to - * be zero and the validity bits for data frame 1, 2, and 3 - * have to be 1. - */ -static int etr_port_valid(struct etr_aib *aib, int port) -{ - unsigned int psc; - - /* Check that this port is receiving OTEs. */ - if (aib->tsp == 0) - return 0; - - psc = port ? aib->esw.psc1 : aib->esw.psc0; - if (psc == etr_lpsc_pps_mode) - return 1; - if (psc == etr_lpsc_operational_step) - return !aib->esw.y && aib->slsw.v1 && - aib->slsw.v2 && aib->slsw.v3; - return 0; -} - -/* - * Check if two ports are on the same network. - */ -static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2) -{ - // FIXME: any other fields we have to compare? - return aib1->edf1.net_id == aib2->edf1.net_id; -} - -/* - * Wrapper for etr_stei that converts physical port states - * to logical port states to be consistent with the output - * of stetr (see etr_psc vs. etr_lpsc). - */ -static void etr_steai_cv(struct etr_aib *aib, unsigned int func) -{ - BUG_ON(etr_steai(aib, func) != 0); - /* Convert port state to logical port state. */ - if (aib->esw.psc0 == 1) - aib->esw.psc0 = 2; - else if (aib->esw.psc0 == 0 && aib->esw.p == 0) - aib->esw.psc0 = 1; - if (aib->esw.psc1 == 1) - aib->esw.psc1 = 2; - else if (aib->esw.psc1 == 0 && aib->esw.p == 1) - aib->esw.psc1 = 1; -} - -/* - * Check if the aib a2 is still connected to the same attachment as - * aib a1, the etv values differ by one and a2 is valid. - */ -static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) -{ - int state_a1, state_a2; - - /* Paranoia check: e0/e1 should better be the same. */ - if (a1->esw.eacr.e0 != a2->esw.eacr.e0 || - a1->esw.eacr.e1 != a2->esw.eacr.e1) - return 0; - - /* Still connected to the same etr ? */ - state_a1 = p ? a1->esw.psc1 : a1->esw.psc0; - state_a2 = p ? a2->esw.psc1 : a2->esw.psc0; - if (state_a1 == etr_lpsc_operational_step) { - if (state_a2 != etr_lpsc_operational_step || - a1->edf1.net_id != a2->edf1.net_id || - a1->edf1.etr_id != a2->edf1.etr_id || - a1->edf1.etr_pn != a2->edf1.etr_pn) - return 0; - } else if (state_a2 != etr_lpsc_pps_mode) - return 0; - - /* The ETV value of a2 needs to be ETV of a1 + 1. */ - if (a1->edf2.etv + 1 != a2->edf2.etv) - return 0; - - if (!etr_port_valid(a2, p)) - return 0; - - return 1; -} - struct clock_sync_data { atomic_t cpus; int in_sync; @@ -748,688 +481,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync) } /* - * Sync the TOD clock using the port referred to by aibp. This port - * has to be enabled and the other port has to be disabled. The - * last eacr update has to be more than 1.6 seconds in the past. - */ -static int etr_sync_clock(void *data) -{ - static int first; - unsigned long long clock, old_clock, clock_delta, delay, delta; - struct clock_sync_data *etr_sync; - struct etr_aib *sync_port, *aib; - int port; - int rc; - - etr_sync = data; - - if (xchg(&first, 1) == 1) { - /* Slave */ - clock_sync_cpu(etr_sync); - return 0; - } - - /* Wait until all other cpus entered the sync function. */ - while (atomic_read(&etr_sync->cpus) != 0) - cpu_relax(); - - port = etr_sync->etr_port; - aib = etr_sync->etr_aib; - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - enable_sync_clock(); - - /* Set clock to next OTE. */ - __ctl_set_bit(14, 21); - __ctl_set_bit(0, 29); - clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; - old_clock = get_tod_clock(); - if (set_tod_clock(clock) == 0) { - __udelay(1); /* Wait for the clock to start. */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - etr_stetr(aib); - /* Adjust Linux timing variables. */ - delay = (unsigned long long) - (aib->edf2.etv - sync_port->edf2.etv) << 32; - delta = adjust_time(old_clock, clock, delay); - clock_delta = clock - old_clock; - atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, - &clock_delta); - etr_sync->fixup_cc = delta; - fixup_clock_comparator(delta); - /* Verify that the clock is properly set. */ - if (!etr_aib_follows(sync_port, aib, port)) { - /* Didn't work. */ - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } else { - etr_sync->in_sync = 1; - rc = 0; - } - } else { - /* Could not set the clock ?!? */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } - xchg(&first, 0); - return rc; -} - -static int etr_sync_clock_stop(struct etr_aib *aib, int port) -{ - struct clock_sync_data etr_sync; - struct etr_aib *sync_port; - int follows; - int rc; - - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; - memset(&etr_sync, 0, sizeof(etr_sync)); - etr_sync.etr_aib = aib; - etr_sync.etr_port = port; - get_online_cpus(); - atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); - put_online_cpus(); - return rc; -} - -/* - * Handle the immediate effects of the different events. - * The port change event is used for online/offline changes. - */ -static struct etr_eacr etr_handle_events(struct etr_eacr eacr) -{ - if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) - eacr.es = 0; - if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) - eacr.es = eacr.sl = 0; - if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events)) - etr_port0_uptodate = etr_port1_uptodate = 0; - - if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) { - if (eacr.e0) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p0 = etr_port0_online; - if (!eacr.p0) - eacr.e0 = 0; - etr_port0_uptodate = 0; - } - if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) { - if (eacr.e1) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p1 = etr_port1_online; - if (!eacr.p1) - eacr.e1 = 0; - etr_port1_uptodate = 0; - } - clear_bit(ETR_EVENT_UPDATE, &etr_events); - return eacr; -} - -/* - * Set up a timer that expires after the etr_tolec + 1.6 seconds if - * one of the ports needs an update. - */ -static void etr_set_tolec_timeout(unsigned long long now) -{ - unsigned long micros; - - if ((!etr_eacr.p0 || etr_port0_uptodate) && - (!etr_eacr.p1 || etr_port1_uptodate)) - return; - micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0; - micros = (micros > 1600000) ? 0 : 1600000 - micros; - mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1); -} - -/* - * Set up a time that expires after 1/2 second. - */ -static void etr_set_sync_timeout(void) -{ - mod_timer(&etr_timer, jiffies + HZ/2); -} - -/* - * Update the aib information for one or both ports. - */ -static struct etr_eacr etr_handle_update(struct etr_aib *aib, - struct etr_eacr eacr) -{ - /* With both ports disabled the aib information is useless. */ - if (!eacr.e0 && !eacr.e1) - return eacr; - - /* Update port0 or port1 with aib stored in etr_work_fn. */ - if (aib->esw.q == 0) { - /* Information for port 0 stored. */ - if (eacr.p0 && !etr_port0_uptodate) { - etr_port0 = *aib; - if (etr_port0_online) - etr_port0_uptodate = 1; - } - } else { - /* Information for port 1 stored. */ - if (eacr.p1 && !etr_port1_uptodate) { - etr_port1 = *aib; - if (etr_port0_online) - etr_port1_uptodate = 1; - } - } - - /* - * Do not try to get the alternate port aib if the clock - * is not in sync yet. - */ - if (!eacr.es || !check_sync_clock()) - return eacr; - - /* - * If steai is available we can get the information about - * the other port immediately. If only stetr is available the - * data-port bit toggle has to be used. - */ - if (etr_steai_available) { - if (eacr.p0 && !etr_port0_uptodate) { - etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); - etr_port0_uptodate = 1; - } - if (eacr.p1 && !etr_port1_uptodate) { - etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1); - etr_port1_uptodate = 1; - } - } else { - /* - * One port was updated above, if the other - * port is not uptodate toggle dp bit. - */ - if ((eacr.p0 && !etr_port0_uptodate) || - (eacr.p1 && !etr_port1_uptodate)) - eacr.dp ^= 1; - else - eacr.dp = 0; - } - return eacr; -} - -/* - * Write new etr control register if it differs from the current one. - * Return 1 if etr_tolec has been updated as well. - */ -static void etr_update_eacr(struct etr_eacr eacr) -{ - int dp_changed; - - if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0) - /* No change, return. */ - return; - /* - * The disable of an active port of the change of the data port - * bit can/will cause a change in the data port. - */ - dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 || - (etr_eacr.dp ^ eacr.dp) != 0; - etr_eacr = eacr; - etr_setr(&etr_eacr); - if (dp_changed) - etr_tolec = get_tod_clock(); -} - -/* - * ETR work. In this function you'll find the main logic. In - * particular this is the only function that calls etr_update_eacr(), - * it "controls" the etr control register. - */ -static void etr_work_fn(struct work_struct *work) -{ - unsigned long long now; - struct etr_eacr eacr; - struct etr_aib aib; - int sync_port; - - /* prevent multiple execution. */ - mutex_lock(&etr_work_mutex); - - /* Create working copy of etr_eacr. */ - eacr = etr_eacr; - - /* Check for the different events and their immediate effects. */ - eacr = etr_handle_events(eacr); - - /* Check if ETR is supposed to be active. */ - eacr.ea = eacr.p0 || eacr.p1; - if (!eacr.ea) { - /* Both ports offline. Reset everything. */ - eacr.dp = eacr.es = eacr.sl = 0; - on_each_cpu(disable_sync_clock, NULL, 1); - del_timer_sync(&etr_timer); - etr_update_eacr(eacr); - goto out_unlock; - } - - /* Store aib to get the current ETR status word. */ - BUG_ON(etr_stetr(&aib) != 0); - etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ - now = get_tod_clock(); - - /* - * Update the port information if the last stepping port change - * or data port change is older than 1.6 seconds. - */ - if (now >= etr_tolec + (1600000 << 12)) - eacr = etr_handle_update(&aib, eacr); - - /* - * Select ports to enable. The preferred synchronization mode is PPS. - * If a port can be enabled depends on a number of things: - * 1) The port needs to be online and uptodate. A port is not - * disabled just because it is not uptodate, but it is only - * enabled if it is uptodate. - * 2) The port needs to have the same mode (pps / etr). - * 3) The port needs to be usable -> etr_port_valid() == 1 - * 4) To enable the second port the clock needs to be in sync. - * 5) If both ports are useable and are ETR ports, the network id - * has to be the same. - * The eacr.sl bit is used to indicate etr mode vs. pps mode. - */ - if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode) - eacr.e1 = 0; - // FIXME: uptodate checks ? - else if (etr_port0_uptodate && etr_port1_uptodate) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || - aib.esw.psc1 != etr_lpsc_operational_alt) - eacr.e1 = 0; - else if (etr_port0_uptodate && etr_port1_uptodate && - etr_compare_network(&etr_port0, &etr_port1)) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else { - /* Both ports not usable. */ - eacr.es = eacr.sl = 0; - sync_port = -1; - } - - /* - * If the clock is in sync just update the eacr and return. - * If there is no valid sync port wait for a port update. - */ - if ((eacr.es && check_sync_clock()) || sync_port < 0) { - etr_update_eacr(eacr); - etr_set_tolec_timeout(now); - goto out_unlock; - } - - /* - * Prepare control register for clock syncing - * (reset data port bit, set sync check control. - */ - eacr.dp = 0; - eacr.es = 1; - - /* - * Update eacr and try to synchronize the clock. If the update - * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occurred) or the - * clock syncing failed, reset the sync check control bit - * and set up a timer to try again after 0.5 seconds - */ - etr_update_eacr(eacr); - if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock_stop(&aib, sync_port) != 0) { - /* Sync failed. Try again in 1/2 second. */ - eacr.es = 0; - etr_update_eacr(eacr); - etr_set_sync_timeout(); - } else - etr_set_tolec_timeout(now); -out_unlock: - mutex_unlock(&etr_work_mutex); -} - -/* - * Sysfs interface functions - */ -static struct bus_type etr_subsys = { - .name = "etr", - .dev_name = "etr", -}; - -static struct device etr_port0_dev = { - .id = 0, - .bus = &etr_subsys, -}; - -static struct device etr_port1_dev = { - .id = 1, - .bus = &etr_subsys, -}; - -/* - * ETR subsys attributes - */ -static ssize_t etr_stepping_port_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", etr_port0.esw.p); -} - -static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); - -static ssize_t etr_stepping_mode_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - char *mode_str; - - if (etr_mode_is_pps(etr_eacr)) - mode_str = "pps"; - else if (etr_mode_is_etr(etr_eacr)) - mode_str = "etr"; - else - mode_str = "local"; - return sprintf(buf, "%s\n", mode_str); -} - -static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); - -/* - * ETR port attributes - */ -static inline struct etr_aib *etr_aib_from_dev(struct device *dev) -{ - if (dev == &etr_port0_dev) - return etr_port0_online ? &etr_port0 : NULL; - else - return etr_port1_online ? &etr_port1 : NULL; -} - -static ssize_t etr_online_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - unsigned int online; - - online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online; - return sprintf(buf, "%i\n", online); -} - -static ssize_t etr_online_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - unsigned int value; - - value = simple_strtoul(buf, NULL, 0); - if (value != 0 && value != 1) - return -EINVAL; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); - if (dev == &etr_port0_dev) { - if (etr_port0_online == value) - goto out; /* Nothing to do. */ - etr_port0_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } else { - if (etr_port1_online == value) - goto out; /* Nothing to do. */ - etr_port1_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } -out: - mutex_unlock(&clock_sync_mutex); - return count; -} - -static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); - -static ssize_t etr_stepping_control_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_eacr.e0 : etr_eacr.e1); -} - -static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); - -static ssize_t etr_mode_code_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (!etr_port0_online && !etr_port1_online) - /* Status word is not uptodate if both ports are offline. */ - return -ENODATA; - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_port0.esw.psc0 : etr_port0.esw.psc1); -} - -static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); - -static ssize_t etr_untuned_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.u); -} - -static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); - -static ssize_t etr_network_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.net_id); -} - -static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); - -static ssize_t etr_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_id); -} - -static DEVICE_ATTR(id, 0400, etr_id_show, NULL); - -static ssize_t etr_port_number_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_pn); -} - -static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); - -static ssize_t etr_coupled_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.c); -} - -static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); - -static ssize_t etr_local_time_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.blto); -} - -static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); - -static ssize_t etr_utc_offset_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.buo); -} - -static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); - -static struct device_attribute *etr_port_attributes[] = { - &dev_attr_online, - &dev_attr_stepping_control, - &dev_attr_state_code, - &dev_attr_untuned, - &dev_attr_network, - &dev_attr_id, - &dev_attr_port, - &dev_attr_coupled, - &dev_attr_local_time, - &dev_attr_utc_offset, - NULL -}; - -static int __init etr_register_port(struct device *dev) -{ - struct device_attribute **attr; - int rc; - - rc = device_register(dev); - if (rc) - goto out; - for (attr = etr_port_attributes; *attr; attr++) { - rc = device_create_file(dev, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= etr_port_attributes; attr--) - device_remove_file(dev, *attr); - device_unregister(dev); -out: - return rc; -} - -static void __init etr_unregister_port(struct device *dev) -{ - struct device_attribute **attr; - - for (attr = etr_port_attributes; *attr; attr++) - device_remove_file(dev, *attr); - device_unregister(dev); -} - -static int __init etr_init_sysfs(void) -{ - int rc; - - rc = subsys_system_register(&etr_subsys, NULL); - if (rc) - goto out; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); - if (rc) - goto out_unreg_subsys; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); - if (rc) - goto out_remove_stepping_port; - rc = etr_register_port(&etr_port0_dev); - if (rc) - goto out_remove_stepping_mode; - rc = etr_register_port(&etr_port1_dev); - if (rc) - goto out_remove_port0; - return 0; - -out_remove_port0: - etr_unregister_port(&etr_port0_dev); -out_remove_stepping_mode: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); -out_remove_stepping_port: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); -out_unreg_subsys: - bus_unregister(&etr_subsys); -out: - return rc; -} - -device_initcall(etr_init_sysfs); - -/* * Server Time Protocol (STP) code. */ static bool stp_online; @@ -1455,7 +506,7 @@ static void __init stp_reset(void) int rc; stp_page = (void *) get_zeroed_page(GFP_ATOMIC); - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { @@ -1533,6 +584,7 @@ static int stp_sync_clock(void *data) static int first; unsigned long long old_clock, delta, new_clock, clock_delta; struct clock_sync_data *stp_sync; + struct ptff_qto qto; int rc; stp_sync = data; @@ -1554,11 +606,14 @@ static int stp_sync_clock(void *data) stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { old_clock = get_tod_clock(); - rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta); if (rc == 0) { - new_clock = get_tod_clock(); + new_clock = old_clock + clock_delta; delta = adjust_time(old_clock, new_clock, 0); - clock_delta = new_clock - old_clock; + if (ptff_query(PTFF_QTO) && + ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + /* Update LPAR offset */ + lpar_offset = qto.tod_epoch_difference; atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &clock_delta); fixup_clock_comparator(delta); @@ -1590,12 +645,12 @@ static void stp_work_fn(struct work_struct *work) mutex_lock(&stp_work_mutex); if (!stp_online) { - chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); del_timer_sync(&stp_timer); goto out_unlock; } - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL); if (rc) goto out_unlock; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 64298a867589..e959c02e0cac 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn); */ static struct mask_info socket_info; static struct mask_info book_info; +static struct mask_info drawer_info; DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); @@ -79,10 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu) return mask; } -static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, - struct mask_info *book, - struct mask_info *socket, - int one_socket_per_cpu) +static void add_cpus_to_mask(struct topology_core *tl_core, + struct mask_info *drawer, + struct mask_info *book, + struct mask_info *socket) { struct cpu_topology_s390 *topo; unsigned int core; @@ -97,21 +98,17 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, continue; for (i = 0; i <= smp_cpu_mtid; i++) { topo = &per_cpu(cpu_topology, lcpu + i); + topo->drawer_id = drawer->id; topo->book_id = book->id; + topo->socket_id = socket->id; topo->core_id = rcore; topo->thread_id = lcpu + i; + cpumask_set_cpu(lcpu + i, &drawer->mask); cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); - if (one_socket_per_cpu) - topo->socket_id = rcore; - else - topo->socket_id = socket->id; smp_cpu_set_polarization(lcpu + i, tl_core->pp); } - if (one_socket_per_cpu) - socket = socket->next; } - return socket; } static void clear_masks(void) @@ -128,6 +125,11 @@ static void clear_masks(void) cpumask_clear(&info->mask); info = info->next; } + info = &drawer_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } } static union topology_entry *next_tle(union topology_entry *tle) @@ -137,16 +139,22 @@ static union topology_entry *next_tle(union topology_entry *tle) return (union topology_entry *)((struct topology_container *)tle + 1); } -static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) +static void tl_to_masks(struct sysinfo_15_1_x *info) { struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; + struct mask_info *drawer = &drawer_info; union topology_entry *tle, *end; + clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { + case 3: + drawer = drawer->next; + drawer->id = tle->container.id; + break; case 2: book = book->next; book->id = tle->container.id; @@ -156,32 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) socket->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, socket, 0); - break; - default: - clear_masks(); - return; - } - tle = next_tle(tle); - } -} - -static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) -{ - struct mask_info *socket = &socket_info; - struct mask_info *book = &book_info; - union topology_entry *tle, *end; - - tle = info->tle; - end = (union topology_entry *)((unsigned long)info + info->length); - while (tle < end) { - switch (tle->nl) { - case 1: - book = book->next; - book->id = tle->container.id; - break; - case 0: - socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + add_cpus_to_mask(&tle->cpu, drawer, book, socket); break; default: clear_masks(); @@ -191,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) } } -static void tl_to_masks(struct sysinfo_15_1_x *info) -{ - struct cpuid cpu_id; - - get_cpu_id(&cpu_id); - clear_masks(); - switch (cpu_id.machine) { - case 0x2097: - case 0x2098: - __tl_to_masks_z10(info); - break; - default: - __tl_to_masks_generic(info); - } -} - static void topology_update_polarization_simple(void) { int cpu; @@ -257,11 +224,13 @@ static void update_cpu_masks(void) topo->thread_mask = cpu_thread_map(cpu); topo->core_mask = cpu_group_map(&socket_info, cpu); topo->book_mask = cpu_group_map(&book_info, cpu); + topo->drawer_mask = cpu_group_map(&drawer_info, cpu); if (!MACHINE_HAS_TOPOLOGY) { topo->thread_id = cpu; topo->core_id = cpu; topo->socket_id = cpu; topo->book_id = cpu; + topo->drawer_id = cpu; } } numa_update_cpu_topology(); @@ -269,10 +238,7 @@ static void update_cpu_masks(void) void store_topology(struct sysinfo_15_1_x *info) { - if (topology_max_mnest >= 3) - stsi(info, 15, 1, 3); - else - stsi(info, 15, 1, 2); + stsi(info, 15, 1, min(topology_max_mnest, 4)); } int arch_update_cpu_topology(void) @@ -442,6 +408,11 @@ static const struct cpumask *cpu_book_mask(int cpu) return &per_cpu(cpu_topology, cpu).book_mask; } +static const struct cpumask *cpu_drawer_mask(int cpu) +{ + return &per_cpu(cpu_topology, cpu).drawer_mask; +} + static int __init early_parse_topology(char *p) { return kstrtobool(p, &topology_enabled); @@ -452,6 +423,7 @@ static struct sched_domain_topology_level s390_topology[] = { { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, { cpu_book_mask, SD_INIT_NAME(BOOK) }, + { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, { cpu_cpu_mask, SD_INIT_NAME(DIE) }, { NULL, }, }; @@ -487,6 +459,7 @@ static int __init s390_topology_init(void) printk(KERN_CONT " / %d\n", info->mnest); alloc_masks(info, &socket_info, 1); alloc_masks(info, &book_info, 2); + alloc_masks(info, &drawer_info, 3); set_sched_topology(s390_topology); return 0; } diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index f9c459586649..68145456fee2 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -1,5 +1,7 @@ # List of files in the vdso, has to be asm only for now +KCOV_INSTRUMENT := n + obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 058659c1b8cf..0b0fd22c869a 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,5 +1,7 @@ # List of files in the vdso, has to be asm only for now +KCOV_INSTRUMENT := n + obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 0f41a8286378..429bfd111961 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -4,6 +4,16 @@ #include <asm/thread_info.h> #include <asm/page.h> + +/* + * Put .bss..swapper_pg_dir as the first thing in .bss. This will + * make sure it has 16k alignment. + */ +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) + +/* Handle ro_after_init data on our own. */ +#define RO_AFTER_INIT_DATA + #include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") @@ -49,7 +59,14 @@ SECTIONS _eshared = .; /* End of shareable data */ _sdata = .; /* Start of data section */ - EXCEPTION_TABLE(16) :data + . = ALIGN(PAGE_SIZE); + __start_ro_after_init = .; + .data..ro_after_init : { + *(.data..ro_after_init) + } + EXCEPTION_TABLE(16) + . = ALIGN(PAGE_SIZE); + __end_ro_after_init = .; RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) @@ -81,7 +98,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - BSS_SECTION(0, 2, 0) + BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) _end = . ; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 43f2a2b80490..6f5c344cd785 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,7 +28,7 @@ #include <linux/vmalloc.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> -#include <asm/etr.h> +#include <asm/stp.h> #include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/nmi.h> diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index b647d5ff0ad9..e390bbb16443 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -236,6 +236,26 @@ char * strrchr(const char * s, int c) } EXPORT_SYMBOL(strrchr); +static inline int clcle(const char *s1, unsigned long l1, + const char *s2, unsigned long l2, + int *diff) +{ + register unsigned long r2 asm("2") = (unsigned long) s1; + register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r4 asm("4") = (unsigned long) s2; + register unsigned long r5 asm("5") = (unsigned long) l2; + int cc; + + asm volatile ("0: clcle %1,%3,0\n" + " jo 0b\n" + " ipm %0\n" + " srl %0,28" + : "=&d" (cc), "+a" (r2), "+a" (r3), + "+a" (r4), "+a" (r5) : : "cc"); + *diff = *(char *)r2 - *(char *)r4; + return cc; +} + /** * strstr - Find the first substring in a %NUL terminated string * @s1: The string to be searched @@ -250,18 +270,9 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; l1 = __strend(s1) - s1; while (l1-- >= l2) { - register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l2; - register unsigned long r4 asm("4") = (unsigned long) s2; - register unsigned long r5 asm("5") = (unsigned long) l2; - int cc; - - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (cc), "+a" (r2), "+a" (r3), - "+a" (r4), "+a" (r5) : : "cc" ); + int cc, dummy; + + cc = clcle(s1, l1, s2, l2, &dummy); if (!cc) return (char *) s1; s1++; @@ -302,20 +313,11 @@ EXPORT_SYMBOL(memchr); */ int memcmp(const void *cs, const void *ct, size_t n) { - register unsigned long r2 asm("2") = (unsigned long) cs; - register unsigned long r3 asm("3") = (unsigned long) n; - register unsigned long r4 asm("4") = (unsigned long) ct; - register unsigned long r5 asm("5") = (unsigned long) n; - int ret; + int ret, diff; - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) - : : "cc" ); + ret = clcle(cs, n, ct, n, &diff); if (ret) - ret = *(char *) r2 - *(char *) r4; + ret = diff; return ret; } EXPORT_SYMBOL(memcmp); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index ae4de559e3a0..d96596128e9f 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -49,7 +49,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr " jnm 5b\n" " ex %4,0(%3)\n" " j 8f\n" - "7:slgr %0,%0\n" + "7: slgr %0,%0\n" "8:\n" EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) @@ -93,7 +93,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " jnm 6b\n" " ex %4,0(%3)\n" " j 9f\n" - "8:slgr %0,%0\n" + "8: slgr %0,%0\n" "9: sacf 768\n" EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) @@ -266,7 +266,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" " slgr %0,%3\n" " j 5f\n" - "4:slgr %0,%0\n" + "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 8556d6be9b54..861880df12c7 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pud = pud_offset(pgd, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { - prot = pud_val(*pud) & _REGION3_ENTRY_RO; + prot = pud_val(*pud) & _REGION_ENTRY_PROTECT; note_page(m, st, prot, 2); } else walk_pmd_level(m, st, pud, addr); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 19288c1b36d3..6ad7eff84c82 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -624,7 +624,7 @@ void pfault_fini(void) diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %0,0,0x258\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : : "a" (&refbk), "m" (refbk) : "cc"); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cace818d86eb..063c721ec0dc 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc); static void gmap_flush_tlb(struct gmap *gmap) { if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); + __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); } @@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap) /* Flush tlb. */ if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); + __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); @@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) VM_BUG_ON(pgd_none(*pgd)); pud = pud_offset(pgd, vmaddr); VM_BUG_ON(pud_none(*pud)); + /* large puds cannot yet be handled */ + if (pud_large(*pud)) + return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); /* large pmds cannot yet be handled */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index a8a6765f1a51..adb0c34bf431 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, return 1; } +static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + struct page *head, *page; + unsigned long mask; + int refs; + + mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID; + if ((pud_val(pud) & mask) != 0) + return 0; + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); + + refs = 0; + head = pud_page(pud); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pud_val(pud) != pud_val(*pudp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + return 1; +} + static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + if (unlikely(pud_large(pud))) { + if (!gup_huge_pud(pudp, pud, addr, next, write, pages, + nr)) + return 0; + } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages, + nr)) return 0; } while (pudp++, addr = next, addr != end); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 1b5e8983f4f3..e19d853883be 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -1,19 +1,22 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2016 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ +#define KMSG_COMPONENT "hugetlb" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/mm.h> #include <linux/hugetlb.h> -static inline pmd_t __pte_to_pmd(pte_t pte) +static inline unsigned long __pte_to_rste(pte_t pte) { - pmd_t pmd; + unsigned long rste; /* - * Convert encoding pte bits pmd bits + * Convert encoding pte bits pmd / pud bits * lIR.uswrdy.p dy..R...I...wr * empty 010.000000.0 -> 00..0...1...00 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 @@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte) * u unused, l large */ if (pte_present(pte)) { - pmd_val(pmd) = pte_val(pte) & PAGE_MASK; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); - pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; + rste = pte_val(pte) & PAGE_MASK; + rste |= (pte_val(pte) & _PAGE_READ) >> 4; + rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; + rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; + rste |= (pte_val(pte) & _PAGE_PROTECT); + rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; + rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; + rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; } else - pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; - return pmd; + rste = _SEGMENT_ENTRY_INVALID; + return rste; } -static inline pte_t __pmd_to_pte(pmd_t pmd) +static inline pte_t __rste_to_pte(unsigned long rste) { + int present; pte_t pte; + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + present = pud_present(__pud(rste)); + else + present = pmd_present(__pmd(rste)); + /* - * Convert encoding pmd bits pte bits + * Convert encoding pmd / pud bits pte bits * dy..R...I...wr lIR.uswrdy.p * empty 00..0...1...00 -> 010.000000.0 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 @@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) * SW-bits: p present, y young, d dirty, r read, w write, s special, * u unused, l large */ - if (pmd_present(pmd)) { - pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; + if (present) { + pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); + pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pmd_t pmd = __pte_to_pmd(pte); - - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - *(pmd_t *) ptep = pmd; + unsigned long rste = __pte_to_rste(pte); + + /* Set correct table type for 2G hugepages */ + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; + else + rste |= _SEGMENT_ENTRY_LARGE; + pte_val(*ptep) = rste; } pte_t huge_ptep_get(pte_t *ptep) { - pmd_t pmd = *(pmd_t *) ptep; - - return __pmd_to_pte(pmd); + return __rste_to_pte(pte_val(*ptep)); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + pte_t pte = huge_ptep_get(ptep); pmd_t *pmdp = (pmd_t *) ptep; - pmd_t old; + pud_t *pudp = (pud_t *) ptep; - old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); - return __pmd_to_pte(old); + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); + else + pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pte; } pte_t *huge_pte_alloc(struct mm_struct *mm, @@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pgdp = pgd_offset(mm, addr); pudp = pud_alloc(mm, pgdp, addr); - if (pudp) - pmdp = pmd_alloc(mm, pudp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } return (pte_t *) pmdp; } @@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) + if (pud_present(*pudp)) { + if (pud_large(*pudp)) + return (pte_t *) pudp; pmdp = pmd_offset(pudp, addr); + } } return (pte_t *) pmdp; } @@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return 0; + return pud_large(pud); +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int flags) +{ + if (flags & FOLL_GET) + return NULL; + + return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long size; + char *string = opt; + + size = memparse(opt, &opt); + if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz= specifies an unsupported page size %s\n", + string); + return 0; + } + return 1; } +__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 2489b2e917c8..f56a39bd8ba6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -40,7 +40,7 @@ #include <asm/ctl_reg.h> #include <asm/sclp.h> -pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -111,17 +111,16 @@ void __init paging_init(void) void mark_rodata_ro(void) { - /* Text and rodata are already protected. Nothing to do here. */ - pr_info("Write protecting the kernel read-only data: %luk\n", - ((unsigned long)&_eshared - (unsigned long)&_stext) >> 10); + unsigned long size = __end_ro_after_init - __start_ro_after_init; + + set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); + pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } void __init mem_init(void) { - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - atomic_set(&init_mm.context.attach_count, 1); set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index a90d45e9dfb0..3330ea124eec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -34,20 +34,25 @@ static int __init cmma(char *str) } __setup("cmma=", cmma); -void __init cmma_init(void) +static inline int cmma_test_essa(void) { register unsigned long tmp asm("0") = 0; register int rc asm("1") = -EOPNOTSUPP; - if (!cmma_flag) - return; asm volatile( " .insn rrf,0xb9ab0000,%1,%1,0,0\n" "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "+&d" (rc), "+&d" (tmp)); - if (rc) + return rc; +} + +void __init cmma_init(void) +{ + if (!cmma_flag) + return; + if (cmma_test_essa()) cmma_flag = 0; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f2a5c29a97e9..7104ffb5a67f 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -10,7 +10,6 @@ #include <asm/pgtable.h> #include <asm/page.h> -#if PAGE_DEFAULT_KEY static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" @@ -22,6 +21,8 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; + if (!PAGE_DEFAULT_KEY) + return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -38,56 +39,254 @@ void __storage_key_init_range(unsigned long start, unsigned long end) start += PAGE_SIZE; } } -#endif -static pte_t *walk_page_table(unsigned long addr) +#ifdef CONFIG_PROC_FS +atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; + +void arch_report_meminfo(struct seq_file *m) { - pgd_t *pgdp; - pud_t *pudp; + seq_printf(m, "DirectMap4k: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); + seq_printf(m, "DirectMap1M: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); + seq_printf(m, "DirectMap2G: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); +} +#endif /* CONFIG_PROC_FS */ + +static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, + unsigned long dtt) +{ + unsigned long table, mask; + + mask = 0; + if (MACHINE_HAS_EDAT2) { + switch (dtt) { + case CRDTE_DTT_REGION3: + mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); + break; + case CRDTE_DTT_SEGMENT: + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + break; + case CRDTE_DTT_PAGE: + mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); + break; + } + table = (unsigned long)old & mask; + crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); + } else if (MACHINE_HAS_IDTE) { + cspg(old, *old, new); + } else { + csp((unsigned int *)old + 1, *old, new); + } +} + +struct cpa { + unsigned int set_ro : 1; + unsigned int clear_ro : 1; +}; + +static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + pte_t *ptep, new; + + ptep = pte_offset(pmdp, addr); + do { + if (pte_none(*ptep)) + return -EINVAL; + if (cpa.set_ro) + new = pte_wrprotect(*ptep); + else if (cpa.clear_ro) + new = pte_mkwrite(pte_mkdirty(*ptep)); + pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); + ptep++; + addr += PAGE_SIZE; + cond_resched(); + } while (addr < end); + return 0; +} + +static int split_pmd_page(pmd_t *pmdp, unsigned long addr) +{ + unsigned long pte_addr, prot; + pte_t *pt_dir, *ptep; + pmd_t new; + int i, ro; + + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + return -ENOMEM; + pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; + ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); + prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + ptep = pt_dir; + for (i = 0; i < PTRS_PER_PTE; i++) { + pte_val(*ptep) = pte_addr | prot; + pte_addr += PAGE_SIZE; + ptep++; + } + pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; + pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); + update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); + update_page_count(PG_DIRECT_MAP_1M, -1); + return 0; +} + +static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa) +{ + pmd_t new; + + if (cpa.set_ro) + new = pmd_wrprotect(*pmdp); + else if (cpa.clear_ro) + new = pmd_mkwrite(pmd_mkdirty(*pmdp)); + pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); +} + +static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; pmd_t *pmdp; - pte_t *ptep; + int rc = 0; - pgdp = pgd_offset_k(addr); - if (pgd_none(*pgdp)) - return NULL; - pudp = pud_offset(pgdp, addr); - if (pud_none(*pudp) || pud_large(*pudp)) - return NULL; pmdp = pmd_offset(pudp, addr); - if (pmd_none(*pmdp) || pmd_large(*pmdp)) - return NULL; - ptep = pte_offset_kernel(pmdp, addr); - if (pte_none(*ptep)) - return NULL; - return ptep; + do { + if (pmd_none(*pmdp)) + return -EINVAL; + next = pmd_addr_end(addr, end); + if (pmd_large(*pmdp)) { + if (addr & ~PMD_MASK || addr + PMD_SIZE > next) { + rc = split_pmd_page(pmdp, addr); + if (rc) + return rc; + continue; + } + modify_pmd_page(pmdp, addr, cpa); + } else { + rc = walk_pte_level(pmdp, addr, next, cpa); + if (rc) + return rc; + } + pmdp++; + addr = next; + cond_resched(); + } while (addr < end); + return rc; } -static void change_page_attr(unsigned long addr, int numpages, - pte_t (*set) (pte_t)) +static int split_pud_page(pud_t *pudp, unsigned long addr) { - pte_t *ptep; - int i; + unsigned long pmd_addr, prot; + pmd_t *pm_dir, *pmdp; + pud_t new; + int i, ro; - for (i = 0; i < numpages; i++) { - ptep = walk_page_table(addr); - if (WARN_ON_ONCE(!ptep)) - break; - *ptep = set(*ptep); - addr += PAGE_SIZE; + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + return -ENOMEM; + pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; + ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); + prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); + pmdp = pm_dir; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_val(*pmdp) = pmd_addr | prot; + pmd_addr += PMD_SIZE; + pmdp++; } - __tlb_flush_kernel(); + pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; + pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); + update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); + update_page_count(PG_DIRECT_MAP_2G, -1); + return 0; +} + +static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa) +{ + pud_t new; + + if (cpa.set_ro) + new = pud_wrprotect(*pudp); + else if (cpa.clear_ro) + new = pud_mkwrite(pud_mkdirty(*pudp)); + pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); +} + +static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; + pud_t *pudp; + int rc = 0; + + pudp = pud_offset(pgd, addr); + do { + if (pud_none(*pudp)) + return -EINVAL; + next = pud_addr_end(addr, end); + if (pud_large(*pudp)) { + if (addr & ~PUD_MASK || addr + PUD_SIZE > next) { + rc = split_pud_page(pudp, addr); + if (rc) + break; + continue; + } + modify_pud_page(pudp, addr, cpa); + } else { + rc = walk_pmd_level(pudp, addr, next, cpa); + } + pudp++; + addr = next; + cond_resched(); + } while (addr < end && !rc); + return rc; +} + +static DEFINE_MUTEX(cpa_mutex); + +static int change_page_attr(unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; + int rc = -EINVAL; + pgd_t *pgdp; + + if (end >= MODULES_END) + return -EINVAL; + mutex_lock(&cpa_mutex); + pgdp = pgd_offset_k(addr); + do { + if (pgd_none(*pgdp)) + break; + next = pgd_addr_end(addr, end); + rc = walk_pud_level(pgdp, addr, next, cpa); + if (rc) + break; + cond_resched(); + } while (pgdp++, addr = next, addr < end && !rc); + mutex_unlock(&cpa_mutex); + return rc; } int set_memory_ro(unsigned long addr, int numpages) { - change_page_attr(addr, numpages, pte_wrprotect); - return 0; + struct cpa cpa = { + .set_ro = 1, + }; + + addr &= PAGE_MASK; + return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); } int set_memory_rw(unsigned long addr, int numpages) { - change_page_attr(addr, numpages, pte_mkwrite); - return 0; + struct cpa cpa = { + .clear_ro = 1, + }; + + addr &= PAGE_MASK; + return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); } /* not possible */ @@ -138,7 +337,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) = __pa(address); + pte_val(*pte) = address | pgprot_val(PAGE_KERNEL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9f0ce0e6eeb4..b98d1a152d46 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -27,40 +27,37 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __ptep_ipte_local(addr, ptep); else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } @@ -70,7 +67,6 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) #ifdef CONFIG_PGSTE unsigned long old; - preempt_disable(); asm( " lg %0,%2\n" "0: lgr %1,%0\n" @@ -93,7 +89,6 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) : "=Q" (ptep[PTRS_PER_PTE]) : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); - preempt_enable(); #endif } @@ -230,9 +225,11 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_direct(mm, addr, ptep); ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + preempt_enable(); return old; } EXPORT_SYMBOL(ptep_xchg_direct); @@ -243,9 +240,11 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + preempt_enable(); return old; } EXPORT_SYMBOL(ptep_xchg_lazy); @@ -256,6 +255,7 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); if (mm_has_pgste(mm)) { @@ -279,13 +279,13 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } else { *ptep = pte; } + preempt_enable(); } EXPORT_SYMBOL(ptep_modify_prot_commit); static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; @@ -295,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, __pmdp_csp(pmdp); return old; } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __pmdp_idte_local(addr, pmdp); else __pmdp_idte(addr, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; } else if (MACHINE_HAS_IDTE) __pmdp_idte(addr, pmdp); else __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } @@ -333,8 +331,10 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, { pmd_t old; + preempt_disable(); old = pmdp_flush_direct(mm, addr, pmdp); *pmdp = new; + preempt_enable(); return old; } EXPORT_SYMBOL(pmdp_xchg_direct); @@ -344,12 +344,53 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, { pmd_t old; + preempt_disable(); old = pmdp_flush_lazy(mm, addr, pmdp); *pmdp = new; + preempt_enable(); return old; } EXPORT_SYMBOL(pmdp_xchg_lazy); +static inline pud_t pudp_flush_direct(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t old; + + old = *pudp; + if (pud_val(old) & _REGION_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + /* + * Invalid bit position is the same for pmd and pud, so we can + * re-use _pmd_csp() here + */ + __pmdp_csp((pmd_t *) pudp); + return old; + } + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pudp_idte_local(addr, pudp); + else + __pudp_idte(addr, pudp); + atomic_dec(&mm->context.flush_count); + return old; +} + +pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t new) +{ + pud_t old; + + preempt_disable(); + old = pudp_flush_direct(mm, addr, pudp); + *pudp = new; + preempt_enable(); + return old; +} +EXPORT_SYMBOL(pudp_xchg_direct); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) @@ -398,20 +439,24 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, pgste_t pgste; /* the mm_has_pgste() check is done in set_pte_at() */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); + preempt_enable(); } void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pgste_t pgste; + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); + preempt_enable(); } static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) @@ -434,6 +479,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_t pte; /* Zap unused and logically-zero pages */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; @@ -446,6 +492,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, if (reset) pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; pgste_set_unlock(ptep, pgste); + preempt_enable(); } void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -454,6 +501,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) pgste_t pgste; /* Clear storage key */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); @@ -461,6 +509,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); pgste_set_unlock(ptep, pgste); + preempt_enable(); } /* diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d48cf25cfe99..1848292766ef 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -11,6 +11,7 @@ #include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/memblock.h> +#include <asm/cacheflush.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/setup.h> @@ -29,9 +30,11 @@ static LIST_HEAD(mem_segs); static void __ref *vmem_alloc_pages(unsigned int order) { + unsigned long size = PAGE_SIZE << order; + if (slab_is_available()) return (void *)__get_free_pages(GFP_KERNEL, order); - return alloc_bootmem_pages((1 << order) * PAGE_SIZE); + return alloc_bootmem_align(size, size); } static inline pud_t *vmem_pud_alloc(void) @@ -45,7 +48,7 @@ static inline pud_t *vmem_pud_alloc(void) return pud; } -static inline pmd_t *vmem_pmd_alloc(void) +pmd_t *vmem_pmd_alloc(void) { pmd_t *pmd = NULL; @@ -56,7 +59,7 @@ static inline pmd_t *vmem_pmd_alloc(void) return pmd; } -static pte_t __ref *vmem_pte_alloc(void) +pte_t __ref *vmem_pte_alloc(void) { pte_t *pte; @@ -75,8 +78,9 @@ static pte_t __ref *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_mem(unsigned long start, unsigned long size, int ro) +static int vmem_add_mem(unsigned long start, unsigned long size) { + unsigned long pages4k, pages1m, pages2g; unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; @@ -85,6 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pte_t *pt_dir; int ret = -ENOMEM; + pages4k = pages1m = pages2g = 0; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -97,10 +102,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = __pa(address) | - _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | - (ro ? _REGION_ENTRY_PROTECT : 0); + pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL); address += PUD_SIZE; + pages2g++; continue; } if (pud_none(*pu_dir)) { @@ -113,11 +117,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = __pa(address) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_YOUNG | - (ro ? _SEGMENT_ENTRY_PROTECT : 0); + pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL); address += PMD_SIZE; + pages1m++; continue; } if (pmd_none(*pm_dir)) { @@ -128,12 +130,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = __pa(address) | - pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + pte_val(*pt_dir) = address | pgprot_val(PAGE_KERNEL); address += PAGE_SIZE; + pages4k++; } ret = 0; out: + update_page_count(PG_DIRECT_MAP_4K, pages4k); + update_page_count(PG_DIRECT_MAP_1M, pages1m); + update_page_count(PG_DIRECT_MAP_2G, pages2g); return ret; } @@ -143,15 +148,15 @@ out: */ static void vmem_remove_range(unsigned long start, unsigned long size) { + unsigned long pages4k, pages1m, pages2g; unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; - pte_val(pte) = _PAGE_INVALID; + pages4k = pages1m = pages2g = 0; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -166,6 +171,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) if (pud_large(*pu_dir)) { pud_clear(pu_dir); address += PUD_SIZE; + pages2g++; continue; } pm_dir = pmd_offset(pu_dir, address); @@ -176,13 +182,18 @@ static void vmem_remove_range(unsigned long start, unsigned long size) if (pmd_large(*pm_dir)) { pmd_clear(pm_dir); address += PMD_SIZE; + pages1m++; continue; } pt_dir = pte_offset_kernel(pm_dir, address); - *pt_dir = pte; + pte_clear(&init_mm, address, pt_dir); address += PAGE_SIZE; + pages4k++; } flush_tlb_kernel_range(start, end); + update_page_count(PG_DIRECT_MAP_4K, -pages4k); + update_page_count(PG_DIRECT_MAP_1M, -pages1m); + update_page_count(PG_DIRECT_MAP_2G, -pages2g); } /* @@ -341,7 +352,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) if (ret) goto out_free; - ret = vmem_add_mem(start, size, 0); + ret = vmem_add_mem(start, size); if (ret) goto out_remove; goto out; @@ -362,31 +373,13 @@ out: */ void __init vmem_map_init(void) { - unsigned long ro_start, ro_end; + unsigned long size = _eshared - _stext; struct memblock_region *reg; - phys_addr_t start, end; - ro_start = PFN_ALIGN((unsigned long)&_stext); - ro_end = (unsigned long)&_eshared & PAGE_MASK; - for_each_memblock(memory, reg) { - start = reg->base; - end = reg->base + reg->size; - if (start >= ro_end || end <= ro_start) - vmem_add_mem(start, end - start, 0); - else if (start >= ro_start && end <= ro_end) - vmem_add_mem(start, end - start, 1); - else if (start >= ro_start) { - vmem_add_mem(start, ro_end - start, 1); - vmem_add_mem(ro_end, end - ro_end, 0); - } else if (end < ro_end) { - vmem_add_mem(start, ro_start - start, 0); - vmem_add_mem(ro_start, end - ro_start, 1); - } else { - vmem_add_mem(start, ro_start - start, 0); - vmem_add_mem(ro_start, ro_end - ro_start, 1); - vmem_add_mem(ro_end, end - ro_end, 0); - } - } + for_each_memblock(memory, reg) + vmem_add_mem(reg->base, reg->size); + set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT); + pr_info("Write protected kernel read-only data: %luk\n", size >> 10); } /* diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 828d0695d0d4..fbc394e16b2c 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -34,7 +34,8 @@ #define DIST_CORE 1 #define DIST_MC 2 #define DIST_BOOK 3 -#define DIST_MAX 4 +#define DIST_DRAWER 4 +#define DIST_MAX 5 /* Node distance reported to common code */ #define EMU_NODE_DIST 10 @@ -43,7 +44,7 @@ #define NODE_ID_FREE -1 /* Different levels of toptree */ -enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY}; +enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; /* The two toptree IDs */ enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; @@ -114,6 +115,14 @@ static int cores_free(struct toptree *tree) */ static struct toptree *core_node(struct toptree *core) { + return core->parent->parent->parent->parent; +} + +/* + * Return drawer of core + */ +static struct toptree *core_drawer(struct toptree *core) +{ return core->parent->parent->parent; } @@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core) */ static int dist_core_to_core(struct toptree *core1, struct toptree *core2) { + if (core_drawer(core1)->id != core_drawer(core2)->id) + return DIST_DRAWER; if (core_book(core1)->id != core_book(core2)->id) return DIST_BOOK; if (core_mc(core1)->id != core_mc(core2)->id) @@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) struct toptree *core; /* Always try to move perfectly fitting structures first */ + move_level_to_numa(numa, phys, DRAWER, true); + move_level_to_numa(numa, phys, DRAWER, false); move_level_to_numa(numa, phys, BOOK, true); move_level_to_numa(numa, phys, BOOK, false); move_level_to_numa(numa, phys, MC, true); @@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys) */ static struct toptree *toptree_from_topology(void) { - struct toptree *phys, *node, *book, *mc, *core; + struct toptree *phys, *node, *drawer, *book, *mc, *core; struct cpu_topology_s390 *top; int cpu; @@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void) for_each_online_cpu(cpu) { top = &per_cpu(cpu_topology, cpu); node = toptree_get_child(phys, 0); - book = toptree_get_child(node, top->book_id); + drawer = toptree_get_child(node, top->drawer_id); + book = toptree_get_child(drawer, top->book_id); mc = toptree_get_child(book, top->socket_id); core = toptree_get_child(mc, top->core_id); - if (!book || !mc || !core) + if (!drawer || !book || !mc || !core) panic("NUMA emulation could not allocate memory"); cpumask_set_cpu(cpu, &core->mask); toptree_update_mask(mc); @@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core) cpumask_copy(&top->thread_mask, &core->mask); cpumask_copy(&top->core_mask, &core_mc(core)->mask); cpumask_copy(&top->book_mask, &core_book(core)->mask); + cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); top->node_id = core_node(core)->id; } diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 496e4a7ee00e..e9dd41b0b8d3 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -7,4 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o -oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c deleted file mode 100644 index ff9b4eb34589..000000000000 --- a/arch/s390/oprofile/hwsampler.c +++ /dev/null @@ -1,1178 +0,0 @@ -/* - * Copyright IBM Corp. 2010 - * Author: Heinz Graalfs <graalfs@de.ibm.com> - */ - -#include <linux/kernel_stat.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/workqueue.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/semaphore.h> -#include <linux/oom.h> -#include <linux/oprofile.h> - -#include <asm/facility.h> -#include <asm/cpu_mf.h> -#include <asm/irq.h> - -#include "hwsampler.h" -#include "op_counter.h" - -#define MAX_NUM_SDB 511 -#define MIN_NUM_SDB 1 - -DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); - -struct hws_execute_parms { - void *buffer; - signed int rc; -}; - -DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); -EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); - -static DEFINE_MUTEX(hws_sem); -static DEFINE_MUTEX(hws_sem_oom); - -static unsigned char hws_flush_all; -static unsigned int hws_oom; -static unsigned int hws_alert; -static struct workqueue_struct *hws_wq; - -static unsigned int hws_state; -enum { - HWS_INIT = 1, - HWS_DEALLOCATED, - HWS_STOPPED, - HWS_STARTED, - HWS_STOPPING }; - -/* set to 1 if called by kernel during memory allocation */ -static unsigned char oom_killer_was_active; -/* size of SDBT and SDB as of allocate API */ -static unsigned long num_sdbt = 100; -static unsigned long num_sdb = 511; -/* sampling interval (machine cycles) */ -static unsigned long interval; - -static unsigned long min_sampler_rate; -static unsigned long max_sampler_rate; - -static void execute_qsi(void *parms) -{ - struct hws_execute_parms *ep = parms; - - ep->rc = qsi(ep->buffer); -} - -static void execute_ssctl(void *parms) -{ - struct hws_execute_parms *ep = parms; - - ep->rc = lsctl(ep->buffer); -} - -static int smp_ctl_ssctl_stop(int cpu) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.es = 0; - cb->ssctl.cs = 0; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) { - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - dump_stack(); - } - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - if (cb->qsi.es || cb->qsi.cs) { - printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); - dump_stack(); - } - - return rc; -} - -static int smp_ctl_ssctl_deactivate(int cpu) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.es = 1; - cb->ssctl.cs = 0; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - if (cb->qsi.cs) - printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); - - return rc; -} - -static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.h = 1; - cb->ssctl.tear = cb->first_sdbt; - cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; - cb->ssctl.interval = interval; - cb->ssctl.es = 1; - cb->ssctl.cs = 1; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - if (ep.rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); - - return rc; -} - -static int smp_ctl_qsi(int cpu) -{ - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - return ep.rc; -} - -static void hws_ext_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer); - - if (!(param32 & CPU_MF_INT_SF_MASK)) - return; - - if (!hws_alert) - return; - - inc_irq_stat(IRQEXT_CMS); - atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); - - if (hws_wq) - queue_work(hws_wq, &cb->worker); -} - -static void worker(struct work_struct *work); - -static void add_samples_to_oprofile(unsigned cpu, unsigned long *, - unsigned long *dear); - -static void init_all_cpu_buffers(void) -{ - int cpu; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - memset(cb, 0, sizeof(struct hws_cpu_buffer)); - } -} - -static void prepare_cpu_buffers(void) -{ - struct hws_cpu_buffer *cb; - int cpu; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - atomic_set(&cb->ext_params, 0); - cb->worker_entry = 0; - cb->sample_overflow = 0; - cb->req_alert = 0; - cb->incorrect_sdbt_entry = 0; - cb->invalid_entry_address = 0; - cb->loss_of_sample_data = 0; - cb->sample_auth_change_alert = 0; - cb->finish = 0; - cb->oom = 0; - cb->stop_mode = 0; - } -} - -/* - * allocate_sdbt() - allocate sampler memory - * @cpu: the cpu for which sampler memory is allocated - * - * A 4K page is allocated for each requested SDBT. - * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. - * Set ALERT_REQ mask in each SDBs trailer. - * Returns zero if successful, <0 otherwise. - */ -static int allocate_sdbt(int cpu) -{ - int j, k, rc; - unsigned long *sdbt; - unsigned long sdb; - unsigned long *tail; - unsigned long *trailer; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (cb->first_sdbt) - return -EINVAL; - - sdbt = NULL; - tail = sdbt; - - for (j = 0; j < num_sdbt; j++) { - sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); - - mutex_lock(&hws_sem_oom); - /* OOM killer might have been activated */ - barrier(); - if (oom_killer_was_active || !sdbt) { - if (sdbt) - free_page((unsigned long)sdbt); - - goto allocate_sdbt_error; - } - if (cb->first_sdbt == 0) - cb->first_sdbt = (unsigned long)sdbt; - - /* link current page to tail of chain */ - if (tail) - *tail = (unsigned long)(void *)sdbt + 1; - - mutex_unlock(&hws_sem_oom); - - for (k = 0; k < num_sdb; k++) { - /* get and set SDB page */ - sdb = get_zeroed_page(GFP_KERNEL); - - mutex_lock(&hws_sem_oom); - /* OOM killer might have been activated */ - barrier(); - if (oom_killer_was_active || !sdb) { - if (sdb) - free_page(sdb); - - goto allocate_sdbt_error; - } - *sdbt = sdb; - trailer = trailer_entry_ptr(*sdbt); - *trailer = SDB_TE_ALERT_REQ_MASK; - sdbt++; - mutex_unlock(&hws_sem_oom); - } - tail = sdbt; - } - mutex_lock(&hws_sem_oom); - if (oom_killer_was_active) - goto allocate_sdbt_error; - - rc = 0; - if (tail) - *tail = (unsigned long) - ((void *)cb->first_sdbt) + 1; - -allocate_sdbt_exit: - mutex_unlock(&hws_sem_oom); - return rc; - -allocate_sdbt_error: - rc = -ENOMEM; - goto allocate_sdbt_exit; -} - -/* - * deallocate_sdbt() - deallocate all sampler memory - * - * For each online CPU all SDBT trees are deallocated. - * Returns the number of freed pages. - */ -static int deallocate_sdbt(void) -{ - int cpu; - int counter; - - counter = 0; - - for_each_online_cpu(cpu) { - unsigned long start; - unsigned long sdbt; - unsigned long *curr; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (!cb->first_sdbt) - continue; - - sdbt = cb->first_sdbt; - curr = (unsigned long *) sdbt; - start = sdbt; - - /* we'll free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* watch for link entry reset if found */ - if (is_link_entry(curr)) { - curr = get_next_sdbt(curr); - if (sdbt) - free_page(sdbt); - - /* we are done if we reach the start */ - if ((unsigned long) curr == start) - break; - else - sdbt = (unsigned long) curr; - } else { - /* process SDB pointer */ - if (*curr) { - free_page(*curr); - curr++; - } - } - counter++; - } - cb->first_sdbt = 0; - } - return counter; -} - -static int start_sampling(int cpu) -{ - int rc; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_ssctl_enable_activate(cpu, interval); - if (rc) { - printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); - goto start_exit; - } - - rc = -EINVAL; - if (!cb->qsi.es) { - printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); - goto start_exit; - } - - if (!cb->qsi.cs) { - printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); - goto start_exit; - } - - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", - cpu, interval); - - rc = 0; - -start_exit: - return rc; -} - -static int stop_sampling(int cpu) -{ - unsigned long v; - int rc; - struct hws_cpu_buffer *cb; - - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (!rc && !cb->qsi.es) - printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); - - rc = smp_ctl_ssctl_stop(cpu); - if (rc) { - printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", - cpu, rc); - goto stop_exit; - } - - printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); - -stop_exit: - v = cb->req_alert; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," - " count=%lu.\n", cpu, v); - - v = cb->loss_of_sample_data; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," - " count=%lu.\n", cpu, v); - - v = cb->invalid_entry_address; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," - " count=%lu.\n", cpu, v); - - v = cb->incorrect_sdbt_entry; - if (v) - printk(KERN_ERR - "hwsampler: CPU %d CPUMF Incorrect SDBT address," - " count=%lu.\n", cpu, v); - - v = cb->sample_auth_change_alert; - if (v) - printk(KERN_ERR - "hwsampler: CPU %d CPUMF Sample authorization change," - " count=%lu.\n", cpu, v); - - return rc; -} - -static int check_hardware_prerequisites(void) -{ - if (!test_facility(68)) - return -EOPNOTSUPP; - return 0; -} -/* - * hws_oom_callback() - the OOM callback function - * - * In case the callback is invoked during memory allocation for the - * hw sampler, all obtained memory is deallocated and a flag is set - * so main sampler memory allocation can exit with a failure code. - * In case the callback is invoked during sampling the hw sampler - * is deactivated for all CPUs. - */ -static int hws_oom_callback(struct notifier_block *nfb, - unsigned long dummy, void *parm) -{ - unsigned long *freed; - int cpu; - struct hws_cpu_buffer *cb; - - freed = parm; - - mutex_lock(&hws_sem_oom); - - if (hws_state == HWS_DEALLOCATED) { - /* during memory allocation */ - if (oom_killer_was_active == 0) { - oom_killer_was_active = 1; - *freed += deallocate_sdbt(); - } - } else { - int i; - cpu = get_cpu(); - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (!cb->oom) { - for_each_online_cpu(i) { - smp_ctl_ssctl_deactivate(i); - cb->oom = 1; - } - cb->finish = 1; - - printk(KERN_INFO - "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", - cpu); - } - } - - mutex_unlock(&hws_sem_oom); - - return NOTIFY_OK; -} - -static struct notifier_block hws_oom_notifier = { - .notifier_call = hws_oom_callback -}; - -static int hws_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - /* We do not have sampler space available for all possible CPUs. - All CPUs should be online when hw sampling is activated. */ - return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; -} - -static struct notifier_block hws_cpu_notifier = { - .notifier_call = hws_cpu_callback -}; - -/** - * hwsampler_deactivate() - set hardware sampling temporarily inactive - * @cpu: specifies the CPU to be set inactive. - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_deactivate(unsigned int cpu) -{ - /* - * Deactivate hw sampling temporarily and flush the buffer - * by pushing all the pending samples to oprofile buffer. - * - * This function can be called under one of the following conditions: - * Memory unmap, task is exiting. - */ - int rc; - struct hws_cpu_buffer *cb; - - rc = 0; - mutex_lock(&hws_sem); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (hws_state == HWS_STARTED) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (cb->qsi.cs) { - rc = smp_ctl_ssctl_deactivate(cpu); - if (rc) { - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); - cb->finish = 1; - hws_state = HWS_STOPPING; - } else { - hws_flush_all = 1; - /* Add work to queue to read pending samples.*/ - queue_work_on(cpu, hws_wq, &cb->worker); - } - } - } - mutex_unlock(&hws_sem); - - if (hws_wq) - flush_workqueue(hws_wq); - - return rc; -} - -/** - * hwsampler_activate() - activate/resume hardware sampling which was deactivated - * @cpu: specifies the CPU to be set active. - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_activate(unsigned int cpu) -{ - /* - * Re-activate hw sampling. This should be called in pair with - * hwsampler_deactivate(). - */ - int rc; - struct hws_cpu_buffer *cb; - - rc = 0; - mutex_lock(&hws_sem); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (hws_state == HWS_STARTED) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (!cb->qsi.cs) { - hws_flush_all = 0; - rc = smp_ctl_ssctl_enable_activate(cpu, interval); - if (rc) { - printk(KERN_ERR - "CPU %d, CPUMF activate sampling failed.\n", - cpu); - } - } - } - - mutex_unlock(&hws_sem); - - return rc; -} - -static int check_qsi_on_setup(void) -{ - int rc; - unsigned int cpu; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (rc) - return -EOPNOTSUPP; - - if (!cb->qsi.as) { - printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); - return -EINVAL; - } - - if (cb->qsi.es) { - printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); - rc = smp_ctl_ssctl_stop(cpu); - if (rc) - return -EINVAL; - - printk(KERN_INFO - "CPU %d, CPUMF Sampling stopped now.\n", cpu); - } - } - return 0; -} - -static int check_qsi_on_start(void) -{ - unsigned int cpu; - int rc; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - - if (!cb->qsi.as) - return -EINVAL; - - if (cb->qsi.es) - return -EINVAL; - - if (cb->qsi.cs) - return -EINVAL; - } - return 0; -} - -static void worker_on_start(unsigned int cpu) -{ - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - cb->worker_entry = cb->first_sdbt; -} - -static int worker_check_error(unsigned int cpu, int ext_params) -{ - int rc; - unsigned long *sdbt; - struct hws_cpu_buffer *cb; - - rc = 0; - cb = &per_cpu(sampler_cpu_buffer, cpu); - sdbt = (unsigned long *) cb->worker_entry; - - if (!sdbt || !*sdbt) - return -EINVAL; - - if (ext_params & CPU_MF_INT_SF_PRA) - cb->req_alert++; - - if (ext_params & CPU_MF_INT_SF_LSDA) - cb->loss_of_sample_data++; - - if (ext_params & CPU_MF_INT_SF_IAE) { - cb->invalid_entry_address++; - rc = -EINVAL; - } - - if (ext_params & CPU_MF_INT_SF_ISE) { - cb->incorrect_sdbt_entry++; - rc = -EINVAL; - } - - if (ext_params & CPU_MF_INT_SF_SACA) { - cb->sample_auth_change_alert++; - rc = -EINVAL; - } - - return rc; -} - -static void worker_on_finish(unsigned int cpu) -{ - int rc, i; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (cb->finish) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (cb->qsi.es) { - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", - cpu); - rc = smp_ctl_ssctl_stop(cpu); - if (rc) - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Deactivation failed.\n", - cpu); - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (!cb->finish) { - cb->finish = 1; - queue_work_on(i, hws_wq, - &cb->worker); - } - } - } - } -} - -static void worker_on_interrupt(unsigned int cpu) -{ - unsigned long *sdbt; - unsigned char done; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - sdbt = (unsigned long *) cb->worker_entry; - - done = 0; - /* do not proceed if stop was entered, - * forget the buffers not yet processed */ - while (!done && !cb->stop_mode) { - unsigned long *trailer; - struct hws_trailer_entry *te; - unsigned long *dear = 0; - - trailer = trailer_entry_ptr(*sdbt); - /* leave loop if no more work to do */ - if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { - done = 1; - if (!hws_flush_all) - continue; - } - - te = (struct hws_trailer_entry *)trailer; - cb->sample_overflow += te->overflow; - - add_samples_to_oprofile(cpu, sdbt, dear); - - /* reset trailer */ - xchg((unsigned char *) te, 0x40); - - /* advance to next sdb slot in current sdbt */ - sdbt++; - /* in case link bit is set use address w/o link bit */ - if (is_link_entry(sdbt)) - sdbt = get_next_sdbt(sdbt); - - cb->worker_entry = (unsigned long)sdbt; - } -} - -static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, - unsigned long *dear) -{ - struct hws_basic_entry *sample_data_ptr; - unsigned long *trailer; - - trailer = trailer_entry_ptr(*sdbt); - if (dear) { - if (dear > trailer) - return; - trailer = dear; - } - - sample_data_ptr = (struct hws_basic_entry *)(*sdbt); - - while ((unsigned long *)sample_data_ptr < trailer) { - struct pt_regs *regs = NULL; - struct task_struct *tsk = NULL; - - /* - * Check sampling mode, 1 indicates basic (=customer) sampling - * mode. - */ - if (sample_data_ptr->def != 1) { - /* sample slot is not yet written */ - break; - } else { - /* make sure we don't use it twice, - * the next time the sampler will set it again */ - sample_data_ptr->def = 0; - } - - /* Get pt_regs. */ - if (sample_data_ptr->P == 1) { - /* userspace sample */ - unsigned int pid = sample_data_ptr->prim_asn; - if (!counter_config.user) - goto skip_sample; - rcu_read_lock(); - tsk = pid_task(find_vpid(pid), PIDTYPE_PID); - if (tsk) - regs = task_pt_regs(tsk); - rcu_read_unlock(); - } else { - /* kernelspace sample */ - if (!counter_config.kernel) - goto skip_sample; - regs = task_pt_regs(current); - } - - mutex_lock(&hws_sem); - oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, - !sample_data_ptr->P, tsk); - mutex_unlock(&hws_sem); - skip_sample: - sample_data_ptr++; - } -} - -static void worker(struct work_struct *work) -{ - unsigned int cpu; - int ext_params; - struct hws_cpu_buffer *cb; - - cb = container_of(work, struct hws_cpu_buffer, worker); - cpu = smp_processor_id(); - ext_params = atomic_xchg(&cb->ext_params, 0); - - if (!cb->worker_entry) - worker_on_start(cpu); - - if (worker_check_error(cpu, ext_params)) - return; - - if (!cb->finish) - worker_on_interrupt(cpu); - - if (cb->finish) - worker_on_finish(cpu); -} - -/** - * hwsampler_allocate() - allocate memory for the hardware sampler - * @sdbt: number of SDBTs per online CPU (must be > 0) - * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) -{ - int cpu, rc; - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state != HWS_DEALLOCATED) - goto allocate_exit; - - if (sdbt < 1) - goto allocate_exit; - - if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) - goto allocate_exit; - - num_sdbt = sdbt; - num_sdb = sdb; - - oom_killer_was_active = 0; - register_oom_notifier(&hws_oom_notifier); - - for_each_online_cpu(cpu) { - if (allocate_sdbt(cpu)) { - unregister_oom_notifier(&hws_oom_notifier); - goto allocate_error; - } - } - unregister_oom_notifier(&hws_oom_notifier); - if (oom_killer_was_active) - goto allocate_error; - - hws_state = HWS_STOPPED; - rc = 0; - -allocate_exit: - mutex_unlock(&hws_sem); - return rc; - -allocate_error: - rc = -ENOMEM; - printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); - goto allocate_exit; -} - -/** - * hwsampler_deallocate() - deallocate hardware sampler memory - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_deallocate(void) -{ - int rc; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state != HWS_STOPPED) - goto deallocate_exit; - - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - hws_alert = 0; - deallocate_sdbt(); - - hws_state = HWS_DEALLOCATED; - rc = 0; - -deallocate_exit: - mutex_unlock(&hws_sem); - - return rc; -} - -unsigned long hwsampler_query_min_interval(void) -{ - return min_sampler_rate; -} - -unsigned long hwsampler_query_max_interval(void) -{ - return max_sampler_rate; -} - -unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) -{ - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - return cb->sample_overflow; -} - -int hwsampler_setup(void) -{ - int rc; - int cpu; - struct hws_cpu_buffer *cb; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state) - goto setup_exit; - - hws_state = HWS_INIT; - - init_all_cpu_buffers(); - - rc = check_hardware_prerequisites(); - if (rc) - goto setup_exit; - - rc = check_qsi_on_setup(); - if (rc) - goto setup_exit; - - rc = -EINVAL; - hws_wq = create_workqueue("hwsampler"); - if (!hws_wq) - goto setup_exit; - - register_cpu_notifier(&hws_cpu_notifier); - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - INIT_WORK(&cb->worker, worker); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (min_sampler_rate != cb->qsi.min_sampl_rate) { - if (min_sampler_rate) { - printk(KERN_WARNING - "hwsampler: different min sampler rate values.\n"); - if (min_sampler_rate < cb->qsi.min_sampl_rate) - min_sampler_rate = - cb->qsi.min_sampl_rate; - } else - min_sampler_rate = cb->qsi.min_sampl_rate; - } - if (max_sampler_rate != cb->qsi.max_sampl_rate) { - if (max_sampler_rate) { - printk(KERN_WARNING - "hwsampler: different max sampler rate values.\n"); - if (max_sampler_rate > cb->qsi.max_sampl_rate) - max_sampler_rate = - cb->qsi.max_sampl_rate; - } else - max_sampler_rate = cb->qsi.max_sampl_rate; - } - } - register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); - - hws_state = HWS_DEALLOCATED; - rc = 0; - -setup_exit: - mutex_unlock(&hws_sem); - return rc; -} - -int hwsampler_shutdown(void) -{ - int rc; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { - mutex_unlock(&hws_sem); - - if (hws_wq) - flush_workqueue(hws_wq); - - mutex_lock(&hws_sem); - - if (hws_state == HWS_STOPPED) { - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - hws_alert = 0; - deallocate_sdbt(); - } - if (hws_wq) { - destroy_workqueue(hws_wq); - hws_wq = NULL; - } - - unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); - hws_state = HWS_INIT; - rc = 0; - } - mutex_unlock(&hws_sem); - - unregister_cpu_notifier(&hws_cpu_notifier); - - return rc; -} - -/** - * hwsampler_start_all() - start hardware sampling on all online CPUs - * @rate: specifies the used interval when samples are taken - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_start_all(unsigned long rate) -{ - int rc, cpu; - - mutex_lock(&hws_sem); - - hws_oom = 0; - - rc = -EINVAL; - if (hws_state != HWS_STOPPED) - goto start_all_exit; - - interval = rate; - - /* fail if rate is not valid */ - if (interval < min_sampler_rate || interval > max_sampler_rate) - goto start_all_exit; - - rc = check_qsi_on_start(); - if (rc) - goto start_all_exit; - - prepare_cpu_buffers(); - - for_each_online_cpu(cpu) { - rc = start_sampling(cpu); - if (rc) - break; - } - if (rc) { - for_each_online_cpu(cpu) { - stop_sampling(cpu); - } - goto start_all_exit; - } - hws_state = HWS_STARTED; - rc = 0; - -start_all_exit: - mutex_unlock(&hws_sem); - - if (rc) - return rc; - - register_oom_notifier(&hws_oom_notifier); - hws_oom = 1; - hws_flush_all = 0; - /* now let them in, 1407 CPUMF external interrupts */ - hws_alert = 1; - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/** - * hwsampler_stop_all() - stop hardware sampling on all online CPUs - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_stop_all(void) -{ - int tmp_rc, rc, cpu; - struct hws_cpu_buffer *cb; - - mutex_lock(&hws_sem); - - rc = 0; - if (hws_state == HWS_INIT) { - mutex_unlock(&hws_sem); - return 0; - } - hws_state = HWS_STOPPING; - mutex_unlock(&hws_sem); - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - cb->stop_mode = 1; - tmp_rc = stop_sampling(cpu); - if (tmp_rc) - rc = tmp_rc; - } - - if (hws_wq) - flush_workqueue(hws_wq); - - mutex_lock(&hws_sem); - if (hws_oom) { - unregister_oom_notifier(&hws_oom_notifier); - hws_oom = 0; - } - hws_state = HWS_STOPPED; - mutex_unlock(&hws_sem); - - return rc; -} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h deleted file mode 100644 index a483d06f2fa7..000000000000 --- a/arch/s390/oprofile/hwsampler.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * CPUMF HW sampler functions and internal structures - * - * Copyright IBM Corp. 2010 - * Author(s): Heinz Graalfs <graalfs@de.ibm.com> - */ - -#ifndef HWSAMPLER_H_ -#define HWSAMPLER_H_ - -#include <linux/workqueue.h> -#include <asm/cpu_mf.h> - -struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ -{ /* bytes 0 - 7 Bit(s) */ - unsigned int s:1; /* 0: maximum buffer indicator */ - unsigned int h:1; /* 1: part. level reserved for VM use*/ - unsigned long b2_53:52; /* 2-53: zeros */ - unsigned int es:1; /* 54: sampling enable control */ - unsigned int b55_61:7; /* 55-61: - zeros */ - unsigned int cs:1; /* 62: sampling activation control */ - unsigned int b63:1; /* 63: zero */ - unsigned long interval; /* 8-15: sampling interval */ - unsigned long tear; /* 16-23: TEAR contents */ - unsigned long dear; /* 24-31: DEAR contents */ - /* 32-63: */ - unsigned long rsvrd1; /* reserved */ - unsigned long rsvrd2; /* reserved */ - unsigned long rsvrd3; /* reserved */ - unsigned long rsvrd4; /* reserved */ -}; - -struct hws_cpu_buffer { - unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ - unsigned long worker_entry; - unsigned long sample_overflow; /* taken from SDB ... */ - struct hws_qsi_info_block qsi; - struct hws_ssctl_request_block ssctl; - struct work_struct worker; - atomic_t ext_params; - unsigned long req_alert; - unsigned long loss_of_sample_data; - unsigned long invalid_entry_address; - unsigned long incorrect_sdbt_entry; - unsigned long sample_auth_change_alert; - unsigned int finish:1; - unsigned int oom:1; - unsigned int stop_mode:1; -}; - -int hwsampler_setup(void); -int hwsampler_shutdown(void); -int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); -int hwsampler_deallocate(void); -unsigned long hwsampler_query_min_interval(void); -unsigned long hwsampler_query_max_interval(void); -int hwsampler_start_all(unsigned long interval); -int hwsampler_stop_all(void); -int hwsampler_deactivate(unsigned int cpu); -int hwsampler_activate(unsigned int cpu); -unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); - -#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 791935a65800..16f4c3960b87 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -10,488 +10,8 @@ */ #include <linux/oprofile.h> -#include <linux/perf_event.h> #include <linux/init.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/module.h> #include <asm/processor.h> -#include <asm/perf_event.h> - -#include "../../../drivers/oprofile/oprof.h" - -#include "hwsampler.h" -#include "op_counter.h" - -#define DEFAULT_INTERVAL 4127518 - -#define DEFAULT_SDBT_BLOCKS 1 -#define DEFAULT_SDB_BLOCKS 511 - -static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; -static unsigned long oprofile_min_interval; -static unsigned long oprofile_max_interval; - -static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; -static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; - -static int hwsampler_enabled; -static int hwsampler_running; /* start_mutex must be held to change */ -static int hwsampler_available; - -static struct oprofile_operations timer_ops; - -struct op_counter_config counter_config; - -enum __force_cpu_type { - reserved = 0, /* do not force */ - timer, -}; -static int force_cpu_type; - -static int set_cpu_type(const char *str, struct kernel_param *kp) -{ - if (!strcmp(str, "timer")) { - force_cpu_type = timer; - printk(KERN_INFO "oprofile: forcing timer to be returned " - "as cpu type\n"); - } else { - force_cpu_type = 0; - } - - return 0; -} -module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); -MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" - "(report cpu_type \"timer\""); - -static int __oprofile_hwsampler_start(void) -{ - int retval; - - retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); - if (retval) - return retval; - - retval = hwsampler_start_all(oprofile_hw_interval); - if (retval) - hwsampler_deallocate(); - - return retval; -} - -static int oprofile_hwsampler_start(void) -{ - int retval; - - hwsampler_running = hwsampler_enabled; - - if (!hwsampler_running) - return timer_ops.start(); - - retval = perf_reserve_sampling(); - if (retval) - return retval; - - retval = __oprofile_hwsampler_start(); - if (retval) - perf_release_sampling(); - - return retval; -} - -static void oprofile_hwsampler_stop(void) -{ - if (!hwsampler_running) { - timer_ops.stop(); - return; - } - - hwsampler_stop_all(); - hwsampler_deallocate(); - perf_release_sampling(); - return; -} - -/* - * File ops used for: - * /dev/oprofile/0/enabled - * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) - */ - -static ssize_t hwsampler_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); -} - -static ssize_t hwsampler_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - if (oprofile_started) - /* - * save to do without locking as we set - * hwsampler_running in start() when start_mutex is - * held - */ - return -EBUSY; - - hwsampler_enabled = val; - - return count; -} - -static const struct file_operations hwsampler_fops = { - .read = hwsampler_read, - .write = hwsampler_write, -}; - -/* - * File ops used for: - * /dev/oprofile/0/count - * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) - * - * Make sure that the value is within the hardware range. - */ - -static ssize_t hw_interval_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, - count, offset); -} - -static ssize_t hw_interval_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - if (val < oprofile_min_interval) - oprofile_hw_interval = oprofile_min_interval; - else if (val > oprofile_max_interval) - oprofile_hw_interval = oprofile_max_interval; - else - oprofile_hw_interval = val; - - return count; -} - -static const struct file_operations hw_interval_fops = { - .read = hw_interval_read, - .write = hw_interval_write, -}; - -/* - * File ops used for: - * /dev/oprofile/0/event - * Only a single event with number 0 is supported with this counter. - * - * /dev/oprofile/0/unit_mask - * This is a dummy file needed by the user space tools. - * No value other than 0 is accepted or returned. - */ - -static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(0, buf, count, offset); -} - -static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - if (val != 0) - return -EINVAL; - return count; -} - -static const struct file_operations zero_fops = { - .read = hwsampler_zero_read, - .write = hwsampler_zero_write, -}; - -/* /dev/oprofile/0/kernel file ops. */ - -static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(counter_config.kernel, - buf, count, offset); -} - -static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - counter_config.kernel = val; - - return count; -} - -static const struct file_operations kernel_fops = { - .read = hwsampler_kernel_read, - .write = hwsampler_kernel_write, -}; - -/* /dev/oprofile/0/user file ops. */ - -static ssize_t hwsampler_user_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(counter_config.user, - buf, count, offset); -} - -static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - counter_config.user = val; - - return count; -} - -static const struct file_operations user_fops = { - .read = hwsampler_user_read, - .write = hwsampler_user_write, -}; - - -/* - * File ops used for: /dev/oprofile/timer/enabled - * The value always has to be the inverted value of hwsampler_enabled. So - * no separate variable is created. That way we do not need locking. - */ - -static ssize_t timer_enabled_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); -} - -static ssize_t timer_enabled_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - /* Timer cannot be disabled without having hardware sampling. */ - if (val == 0 && !hwsampler_available) - return -EINVAL; - - if (oprofile_started) - /* - * save to do without locking as we set - * hwsampler_running in start() when start_mutex is - * held - */ - return -EBUSY; - - hwsampler_enabled = !val; - - return count; -} - -static const struct file_operations timer_enabled_fops = { - .read = timer_enabled_read, - .write = timer_enabled_write, -}; - - -static int oprofile_create_hwsampling_files(struct dentry *root) -{ - struct dentry *dir; - - dir = oprofilefs_mkdir(root, "timer"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); - - if (!hwsampler_available) - return 0; - - /* reinitialize default values */ - hwsampler_enabled = 1; - counter_config.kernel = 1; - counter_config.user = 1; - - if (!force_cpu_type) { - /* - * Create the counter file system. A single virtual - * counter is created which can be used to - * enable/disable hardware sampling dynamically from - * user space. The user space will configure a single - * counter with a single event. The value of 'event' - * and 'unit_mask' are not evaluated by the kernel code - * and can only be set to 0. - */ - - dir = oprofilefs_mkdir(root, "0"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "enabled", &hwsampler_fops); - oprofilefs_create_file(dir, "event", &zero_fops); - oprofilefs_create_file(dir, "count", &hw_interval_fops); - oprofilefs_create_file(dir, "unit_mask", &zero_fops); - oprofilefs_create_file(dir, "kernel", &kernel_fops); - oprofilefs_create_file(dir, "user", &user_fops); - oprofilefs_create_ulong(dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); - - } else { - /* - * Hardware sampling can be used but the cpu_type is - * forced to timer in order to deal with legacy user - * space tools. The /dev/oprofile/hwsampling fs is - * provided in that case. - */ - dir = oprofilefs_mkdir(root, "hwsampling"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "hwsampler", - &hwsampler_fops); - oprofilefs_create_file(dir, "hw_interval", - &hw_interval_fops); - oprofilefs_create_ro_ulong(dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); - } - return 0; -} - -static int oprofile_hwsampler_init(struct oprofile_operations *ops) -{ - /* - * Initialize the timer mode infrastructure as well in order - * to be able to switch back dynamically. oprofile_timer_init - * is not supposed to fail. - */ - if (oprofile_timer_init(ops)) - BUG(); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); - ops->create_files = oprofile_create_hwsampling_files; - - /* - * If the user space tools do not support newer cpu types, - * the force_cpu_type module parameter - * can be used to always return \"timer\" as cpu type. - */ - if (force_cpu_type != timer) { - struct cpuid id; - - get_cpu_id (&id); - - switch (id.machine) { - case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; - case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; - case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; - case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; - default: return -ENODEV; - } - } - - if (hwsampler_setup()) - return -ENODEV; - - /* - * Query the range for the sampling interval from the - * hardware. - */ - oprofile_min_interval = hwsampler_query_min_interval(); - if (oprofile_min_interval == 0) - return -ENODEV; - oprofile_max_interval = hwsampler_query_max_interval(); - if (oprofile_max_interval == 0) - return -ENODEV; - - /* The initial value should be sane */ - if (oprofile_hw_interval < oprofile_min_interval) - oprofile_hw_interval = oprofile_min_interval; - if (oprofile_hw_interval > oprofile_max_interval) - oprofile_hw_interval = oprofile_max_interval; - - printk(KERN_INFO "oprofile: System z hardware sampling " - "facility found.\n"); - - ops->start = oprofile_hwsampler_start; - ops->stop = oprofile_hwsampler_stop; - - return 0; -} - -static void oprofile_hwsampler_exit(void) -{ - hwsampler_shutdown(); -} static int __s390_backtrace(void *data, unsigned long address) { @@ -514,18 +34,9 @@ static void s390_backtrace(struct pt_regs *regs, unsigned int depth) int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; - - /* - * -ENODEV is not reported to the caller. The module itself - * will use the timer mode sampling as fallback and this is - * always available. - */ - hwsampler_available = oprofile_hwsampler_init(ops) == 0; - return 0; } void oprofile_arch_exit(void) { - oprofile_hwsampler_exit(); } diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h deleted file mode 100644 index 61b2531eef17..000000000000 --- a/arch/s390/oprofile/op_counter.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright IBM Corp. 2011 - * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) - * - * @remark Copyright 2011 OProfile authors - */ - -#ifndef OP_COUNTER_H -#define OP_COUNTER_H - -struct op_counter_config { - /* `enabled' maps to the hwsampler_file variable. */ - /* `count' maps to the oprofile_hw_interval variable. */ - /* `event' and `unit_mask' are unused. */ - unsigned long kernel; - unsigned long user; -}; - -extern struct op_counter_config counter_config; - -#endif /* OP_COUNTER_H */ diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 1ea8c07eab84..070f1ae5cfad 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -226,7 +226,8 @@ static unsigned long __dma_alloc_iommu(struct device *dev, boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, - start, size, 0, boundary_size, 0); + start, size, zdev->start_dma >> PAGE_SHIFT, + boundary_size, 0); } static unsigned long dma_alloc_iommu(struct device *dev, int size) @@ -469,6 +470,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) * Also set zdev->end_dma to the actual end address of the usable * range, instead of the theoretical maximum as reported by hardware. */ + zdev->start_dma = PAGE_ALIGN(zdev->start_dma); zdev->iommu_size = min3((u64) high_memory, ZPCI_TABLE_SIZE_RT - zdev->start_dma, zdev->end_dma - zdev->start_dma + 1); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index fb2a9a560fdc..c2b27ad8e94d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -145,8 +145,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } - if (pdev) - pci_dev_put(pdev); + pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 10ca15dcab11..fa8d7d4b9751 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -99,7 +99,7 @@ void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) } /* PCI Load */ -static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) { register u64 __req asm("2") = req; register u64 __offset asm("3") = offset; @@ -116,6 +116,16 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) : "d" (__offset) : "cc"); *status = __req >> 24 & 0xff; + *data = __data; + return cc; +} + +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + u64 __data; + int cc; + + cc = ____pcilg(&__data, req, offset, status); if (!cc) *data = __data; |