diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 04:07:17 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 04:07:17 +0200 |
commit | 85a0b791bc17f7a49280b33e2905d109c062a47b (patch) | |
tree | 3b5ef2f586bcf07c5702b6b782b63cf50351f13b | |
parent | Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/ker... (diff) | |
parent | s390/dasd: fix hanging offline processing due to canceled worker (diff) | |
download | linux-85a0b791bc17f7a49280b33e2905d109c062a47b.tar.xz linux-85a0b791bc17f7a49280b33e2905d109c062a47b.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Heiko Carstens:
"Since Martin is on vacation you get the s390 pull request from me:
- Host large page support for KVM guests. As the patches have large
impact on arch/s390/mm/ this series goes out via both the KVM and
the s390 tree.
- Add an option for no compression to the "Kernel compression mode"
menu, this will come in handy with the rework of the early boot
code.
- A large rework of the early boot code that will make life easier
for KASAN and KASLR. With the rework the bootable uncompressed
image is not generated anymore, only the bzImage is available. For
debuggung purposes the new "no compression" option is used.
- Re-enable the gcc plugins as the issue with the latent entropy
plugin is solved with the early boot code rework.
- More spectre relates changes:
+ Detect the etoken facility and remove expolines automatically.
+ Add expolines to a few more indirect branches.
- A rewrite of the common I/O layer trace points to make them
consumable by 'perf stat'.
- Add support for format-3 PCI function measurement blocks.
- Changes for the zcrypt driver:
+ Add attributes to indicate the load of cards and queues.
+ Restructure some code for the upcoming AP device support in KVM.
- Build flags improvements in various Makefiles.
- A few fixes for the kdump support.
- A couple of patches for gcc 8 compile warning cleanup.
- Cleanup s390 specific proc handlers.
- Add s390 support to the restartable sequence self tests.
- Some PTR_RET vs PTR_ERR_OR_ZERO cleanup.
- Lots of bug fixes"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (107 commits)
s390/dasd: fix hanging offline processing due to canceled worker
s390/dasd: fix panic for failed online processing
s390/mm: fix addressing exception after suspend/resume
rseq/selftests: add s390 support
s390: fix br_r1_trampoline for machines without exrl
s390/lib: use expoline for all bcr instructions
s390/numa: move initial setup of node_to_cpumask_map
s390/kdump: Fix elfcorehdr size calculation
s390/cpum_sf: save TOD clock base in SDBs for time conversion
KVM: s390: Add huge page enablement control
s390/mm: Add huge page gmap linking support
s390/mm: hugetlb pages within a gmap can not be freed
KVM: s390: Add skey emulation fault handling
s390/mm: Add huge pmd storage key handling
s390/mm: Clear skeys for newly mapped huge guest pmds
s390/mm: Clear huge page storage keys on enable_skey
s390/mm: Add huge page dirty sync support
s390/mm: Add gmap pmd invalidation and clearing
s390/mm: Add gmap pmd notification bit setting
s390/mm: Add gmap pmd linking
...
107 files changed, 2372 insertions, 1094 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d10944e619d3..cb8db4f9d097 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4391,6 +4391,22 @@ all such vmexits. Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits. +7.14 KVM_CAP_S390_HPAGE_1M + +Architectures: s390 +Parameters: none +Returns: 0 on success, -EINVAL if hpage module parameter was not set + or cmma is enabled + +With this capability the KVM support for memory backing with 1m pages +through hugetlbfs can be enabled for a VM. After the capability is +enabled, cmma can't be enabled anymore and pfmfi and the storage key +interpretation are disabled. If cmma has already been enabled or the +hpage module parameter is not set to 1, -EINVAL is returned. + +While it is generally possible to create a huge page backed VM without +this capability, the VM will not be able to run. + 8. Other capabilities. ---------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 629e08703c82..0a2342770dee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12408,7 +12408,6 @@ F: drivers/pci/hotplug/s390_pci_hpc.c S390 VFIO-CCW DRIVER M: Cornelia Huck <cohuck@redhat.com> -M: Dong Jia Shi <bjsdjshi@linux.ibm.com> M: Halil Pasic <pasic@linux.ibm.com> L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4fe5b2affa23..515240576930 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 68a690442be0..eee6703093c3 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,8 +14,18 @@ LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 +KBUILD_CFLAGS += -m64 +aflags_dwarf := -Wa,-gdwarf-2 +KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__ +KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) +KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ @@ -52,18 +62,14 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -# old style option for packed stacks -ifeq ($(call cc-option-yn,-mkernel-backchain),y) -cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - -# new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif +KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) +KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) + ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) ifneq ($(call cc-option-yn,-mstack-size=8192),y) @@ -71,8 +77,11 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack +ifdef CONFIG_WARN_DYNAMIC_STACK + ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) + KBUILD_CFLAGS += -mwarn-dynamicstack + KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack + endif endif ifdef CONFIG_EXPOLINE @@ -82,6 +91,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif endif @@ -102,11 +112,12 @@ KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) +export KBUILD_AFLAGS_DECOMPRESSOR +export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary -head-y := arch/s390/kernel/head.o -head-y += arch/s390/kernel/head64.o +head-y := arch/s390/kernel/head64.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ @@ -121,7 +132,7 @@ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls tools := arch/s390/tools -all: image bzImage +all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage @@ -129,7 +140,7 @@ KBUILD_IMAGE := $(boot)/bzImage install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -image bzImage: vmlinux +bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zfcpdump: @@ -152,8 +163,7 @@ archprepare: # Don't use tabs in echo arguments define archhelp - echo '* image - Kernel image for IPL ($(boot)/image)' - echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee6a9c387c87..9bf8489df6e6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -206,35 +206,28 @@ static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - char buf[2]; + int timer_active = appldata_timer_active; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &timer_active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, appldata_timer_active ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; spin_lock(&appldata_timer_lock); - if (buf[0] == '1') + if (timer_active) __appldata_vtimer_setup(APPLDATA_ADD_TIMER); - else if (buf[0] == '0') + else __appldata_vtimer_setup(APPLDATA_DEL_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -248,37 +241,24 @@ static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - int interval; - char buf[16]; + int interval = appldata_interval; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &interval, + .maxlen = sizeof(int), + .extra1 = &one, + }; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - len = sprintf(buf, "%i\n", appldata_interval); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - interval = 0; - sscanf(buf, "%i", &interval); - if (interval <= 0) - return -EINVAL; + rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -293,10 +273,17 @@ appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - unsigned int len; - int rc, found; - char buf[2]; struct list_head *lh; + int rc, found; + int active; + int zero = 0; + int one = 1; + struct ctl_table ctl_entry = { + .data = &active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; found = 0; mutex_lock(&appldata_ops_mutex); @@ -317,31 +304,15 @@ appldata_generic_handler(struct ctl_table *ctl, int write, } mutex_unlock(&appldata_ops_mutex); - if (!*lenp || *ppos) { - *lenp = 0; + active = ops->active; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) { module_put(ops->owner); - return 0; - } - if (!write) { - strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) { - module_put(ops->owner); - return -EFAULT; - } - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) { - module_put(ops->owner); - return -EFAULT; + return rc; } mutex_lock(&appldata_ops_mutex); - if ((buf[0] == '1') && (ops->active == 0)) { + if (active && (ops->active == 0)) { // protect work queue callback if (!try_module_get(ops->owner)) { mutex_unlock(&appldata_ops_mutex); @@ -359,7 +330,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } else ops->active = 1; - } else if ((buf[0] == '0') && (ops->active == 1)) { + } else if (!active && (ops->active == 1)) { ops->active = 0; rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, (unsigned long) ops->data, ops->size, @@ -370,9 +341,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } mutex_unlock(&appldata_ops_mutex); -out: - *lenp = len; - *ppos += len; module_put(ops->owner); return 0; } diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index d1fa37fcce83..9e6668ee93de 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,19 +3,52 @@ # Makefile for the linux s390-specific parts of the memory manager. # -targets := image -targets += bzImage -subdir- := compressed +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n -$(obj)/image: vmlinux FORCE - $(call if_changed,objcopy) +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) + +# +# Use -march=z900 for als.c to be able to print an error +# message if the kernel is started on a machine which is too old +# +ifneq ($(CC_FLAGS_MARCH),-march=z900) +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 +AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) +AFLAGS_mem.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp_early_core.o += -march=z900 +endif + +CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char + +obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o +targets := bzImage startup.a $(obj-y) +subdir- := compressed + +OBJECTS := $(addprefix $(obj)/,$(obj-y)) $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) -$(obj)/compressed/vmlinux: FORCE +$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ +quiet_cmd_ar = AR $@ + cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^) + +$(obj)/startup.a: $(OBJECTS) FORCE + $(call if_changed,ar) + install: $(CONFIGURE) $(obj)/bzImage sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" + +chkbss := $(OBJECTS) +chkbss-target := $(obj)/startup.a +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/als.c b/arch/s390/boot/als.c index d1892bf36cab..d592e0d90d9f 100644 --- a/arch/s390/kernel/als.c +++ b/arch/s390/boot/als.c @@ -3,12 +3,10 @@ * Copyright IBM Corp. 2016 */ #include <linux/kernel.h> -#include <linux/init.h> #include <asm/processor.h> #include <asm/facility.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include "entry.h" /* * The code within this file will be called very early. It may _not_ @@ -18,9 +16,9 @@ * For temporary objects the stack (16k) should be used. */ -static unsigned long als[] __initdata = { FACILITIES_ALS }; +static unsigned long als[] = { FACILITIES_ALS }; -static void __init u16_to_hex(char *str, u16 val) +static void u16_to_hex(char *str, u16 val) { int i, num; @@ -33,9 +31,9 @@ static void __init u16_to_hex(char *str, u16 val) *str = '\0'; } -static void __init print_machine_type(void) +static void print_machine_type(void) { - static char mach_str[80] __initdata = "Detected machine-type number: "; + static char mach_str[80] = "Detected machine-type number: "; char type_str[5]; struct cpuid id; @@ -46,7 +44,7 @@ static void __init print_machine_type(void) sclp_early_printk(mach_str); } -static void __init u16_to_decimal(char *str, u16 val) +static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -60,9 +58,9 @@ static void __init u16_to_decimal(char *str, u16 val) *str = '\0'; } -static void __init print_missing_facilities(void) +static void print_missing_facilities(void) { - static char als_str[80] __initdata = "Missing facilities: "; + static char als_str[80] = "Missing facilities: "; unsigned long val; char val_str[6]; int i, j, first; @@ -95,7 +93,7 @@ static void __init print_missing_facilities(void) sclp_early_printk("See Principles of Operations for facility bits\n"); } -static void __init facility_mismatch(void) +static void facility_mismatch(void) { sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); print_machine_type(); @@ -103,7 +101,7 @@ static void __init facility_mismatch(void) disabled_wait(0x8badcccc); } -void __init verify_facilities(void) +void verify_facilities(void) { int i; diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 2088cc140629..45aeb4f08752 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,4 +1,5 @@ sizes.h vmlinux vmlinux.lds +vmlinux.scr.lds vmlinux.bin.full diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 5766f7b9b271..04609478d18b 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,39 +6,29 @@ # KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += misc.o piggy.o sizes.h head.o - -KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2 -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) -KBUILD_CFLAGS += $(call cc-option,-ffreestanding) +targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) -GCOV_PROFILE := n -UBSAN_SANITIZE := n +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o) -OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o -OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o +OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) -TRIM_HEAD_SIZE := 0x11000 - -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' +# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p' quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -quiet_cmd_trim_head = TRIM $@ - cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@ - $(obj)/sizes.h: vmlinux $(call if_changed,sizes) @@ -48,21 +38,18 @@ $(obj)/head.o: $(obj)/sizes.h CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S -$(obj)/vmlinux.bin.full: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux $(call if_changed,objcopy) -$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full - $(call if_changed,trim_head) - vmlinux.bin.all-y := $(obj)/vmlinux.bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_BZIP2) := bz2 -suffix-$(CONFIG_KERNEL_LZ4) := lz4 -suffix-$(CONFIG_KERNEL_LZMA) := lzma -suffix-$(CONFIG_KERNEL_LZO) := lzo -suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) @@ -78,5 +65,9 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y) $(call if_changed,ld) + +chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) +chkbss-target := $(obj)/vmlinux.bin +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 9f94eca0f467..df8dbbc17bcc 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -15,7 +15,7 @@ #include "sizes.h" __HEAD -ENTRY(startup_continue) +ENTRY(startup_decompressor) basr %r13,0 # get base .LPG1: # setup stack @@ -23,7 +23,7 @@ ENTRY(startup_continue) aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to - # 0x11000, where startup_continue of the decompressed image is supposed + # 0x100000, where startup_continue of the decompressed image is supposed # to be. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) @@ -33,7 +33,7 @@ ENTRY(startup_continue) la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) # When the memory mover is done we pass control to - # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in # the decompressed image. lgr %r6,%r2 br %r1 @@ -47,6 +47,6 @@ mover_end: .Lstack: .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) .Loffset: - .quad 0x11000 + .quad 0x100000 .Lmvsize: .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 511b2cc9b91a..f66ad73c205b 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -71,43 +71,6 @@ static int puts(const char *s) return 0; } -void *memset(void *s, int c, size_t n) -{ - char *xs; - - xs = s; - while (n--) - *xs++ = c; - return s; -} - -void *memcpy(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - while (n--) - *d++ = *s++; - return dest; -} - -void *memmove(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - if (d <= s) { - while (n--) - *d++ = *s++; - } else { - d += n; - s += n; - while (n--) - *--d = *--s; - } - return dest; -} - static void error(char *x) { unsigned long long psw = 0x000a0000deadbeefULL; diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d43c2db12d30..b16ac8b3c439 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -23,13 +23,10 @@ SECTIONS *(.text.*) _etext = . ; } - .rodata.compressed : { - *(.rodata.compressed) - } .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - *(.rodata.*) + *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) _erodata = . ; } .data : { @@ -38,6 +35,15 @@ SECTIONS *(.data.*) _edata = . ; } + startup_continue = 0x100000; +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = 0x100000; +#else + . = ALIGN(8); +#endif + .rodata.compressed : { + *(.rodata.compressed) + } . = ALIGN(256); .bss : { _bss = . ; @@ -54,5 +60,6 @@ SECTIONS *(.eh_frame) *(__ex_table) *(*__ksymtab*) + *(___kcrctab*) } } diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr.lds.S index 42a242597f34..ff01d18c9222 100644 --- a/arch/s390/boot/compressed/vmlinux.scr +++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S @@ -2,10 +2,14 @@ SECTIONS { .rodata.compressed : { +#ifndef CONFIG_KERNEL_UNCOMPRESSED input_len = .; LONG(input_data_end - input_data) input_data = .; +#endif *(.data) +#ifndef CONFIG_KERNEL_UNCOMPRESSED output_len = . - 4; input_data_end = .; +#endif } } diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c new file mode 100644 index 000000000000..7391e7d36086 --- /dev/null +++ b/arch/s390/boot/ebcdic.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ebcdic.c" diff --git a/arch/s390/kernel/head.S b/arch/s390/boot/head.S index 5c42f16a54c4..f721913b73f1 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/boot/head.S @@ -272,14 +272,14 @@ iplstart: .org 0x10000 ENTRY(startup) j .Lep_startup_normal - .org 0x10008 + .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of # valid entry points is stored. # # IMPORTANT: Do not change this table, it is s390 kernel ABI! # - .ascii "S390EP" + .ascii EP_STRING .byte 0x00,0x01 # # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode @@ -310,10 +310,11 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities -# For uncompressed images, continue in -# arch/s390/kernel/head64.S. For compressed images, continue in -# arch/s390/boot/compressed/head.S. +#ifdef CONFIG_KERNEL_UNCOMPRESSED jg startup_continue +#else + jg startup_decompressor +#endif .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..174d6959bf5b 100644 --- a/arch/s390/kernel/head_kdump.S +++ b/arch/s390/boot/head_kdump.S diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S new file mode 100644 index 000000000000..b33463633f03 --- /dev/null +++ b/arch/s390/boot/mem.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../lib/mem.S" diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c new file mode 100644 index 000000000000..5a19fd7020b5 --- /dev/null +++ b/arch/s390/boot/sclp_early_core.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../drivers/s390/char/sclp_early_core.c" diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index a2945b289a29..3452e18bb1ca 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -497,7 +497,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -544,7 +544,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 06b513d192b9..c681329fdeec 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -36,7 +36,7 @@ struct hypfs_sb_info { kuid_t uid; /* uid used for files and dirs */ kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ - time_t last_update; /* last update time in secs since 1970 */ + time64_t last_update; /* last update, CLOCK_MONOTONIC time */ struct mutex lock; /* lock to protect update process */ }; @@ -52,7 +52,7 @@ static void hypfs_update_update(struct super_block *sb) struct hypfs_sb_info *sb_info = sb->s_fs_info; struct inode *inode = d_inode(sb_info->update_file); - sb_info->last_update = get_seconds(); + sb_info->last_update = ktime_get_seconds(); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); } @@ -179,7 +179,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) * to restart data collection in this case. */ mutex_lock(&fs_info->lock); - if (fs_info->last_update == get_seconds()) { + if (fs_info->last_update == ktime_get_seconds()) { rc = -EBUSY; goto out; } diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c1bedb4c8de0..046e044a48d0 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -47,6 +47,50 @@ struct ap_queue_status { }; /** + * ap_intructions_available() - Test if AP instructions are available. + * + * Returns 0 if the AP instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2"); + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_tapq(): Test adjunct processor queue. + * @qid: The AP queue number + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "=d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + if (info) + *info = reg2; + return reg1; +} + +/** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number * @tbit: Test facilities bit @@ -54,10 +98,57 @@ struct ap_queue_status { * * Returns AP queue status structure. */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info); +static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) +{ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ + return ap_tapq(qid, info); +} +/** + * ap_pqap_rapq(): Reset adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_rapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (1UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_zapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (2UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(ZAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * struct ap_config_info - convenience struct for AP crypto + * config info as returned by the ap_qci() function. + */ struct ap_config_info { unsigned int apsc : 1; /* S bit */ unsigned int apxa : 1; /* N bit */ @@ -74,50 +165,189 @@ struct ap_config_info { unsigned char _reserved4[16]; } __aligned(8); -/* - * ap_query_configuration(): Fetch cryptographic config info +/** + * ap_qci(): Get AP configuration data * - * Returns the ap configuration info fetched via PQAP(QCI). - * On success 0 is returned, on failure a negative errno - * is returned, e.g. if the PQAP(QCI) instruction is not - * available, the return value will be -EOPNOTSUPP. + * Returns 0 on success, or -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info); +static inline int ap_qci(struct ap_config_info *config) +{ + register unsigned long reg0 asm ("0") = 4UL << 24; + register unsigned long reg1 asm ("1") = -EOPNOTSUPP; + register struct ap_config_info *reg2 asm ("2") = config; + + asm volatile( + ".long 0xb2af0000\n" /* PQAP(QCI) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1) + : "d" (reg0), "d" (reg2) + : "cc", "memory"); + + return reg1; +} /* * struct ap_qirq_ctrl - convenient struct for easy invocation - * of the ap_queue_irq_ctrl() function. This struct is passed - * as GR1 parameter to the PQAP(AQIC) instruction. For details - * please see the AR documentation. + * of the ap_aqic() function. This struct is passed as GR1 + * parameter to the PQAP(AQIC) instruction. For details please + * see the AR documentation. */ struct ap_qirq_ctrl { unsigned int _res1 : 8; - unsigned int zone : 8; /* zone info */ - unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ unsigned int _res2 : 4; - unsigned int gisc : 3; /* guest isc field */ + unsigned int gisc : 3; /* guest isc field */ unsigned int _res3 : 6; - unsigned int gf : 2; /* gisa format */ + unsigned int gf : 2; /* gisa format */ unsigned int _res4 : 1; - unsigned int gisa : 27; /* gisa origin */ + unsigned int gisa : 27; /* gisa origin */ unsigned int _res5 : 1; - unsigned int isc : 3; /* irq sub class */ + unsigned int isc : 3; /* irq sub class */ }; /** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl, see above + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. + */ +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + register unsigned long reg0 asm ("0") = qid | (3UL << 24); + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; + register struct ap_queue_status reg1_out asm ("1"); + register void *reg2 asm ("2") = ind; + + asm volatile( + ".long 0xb2af0000" /* PQAP(AQIC) */ + : "=d" (reg1_out) + : "d" (reg0), "d" (reg1_in), "d" (reg2) + : "cc"); + return reg1_out; +} + +/* + * union ap_qact_ap_info - used together with the + * ap_aqic() function to provide a convenient way + * to handle the ap info needed by the qact function. + */ +union ap_qact_ap_info { + unsigned long val; + struct { + unsigned int : 3; + unsigned int mode : 3; + unsigned int : 26; + unsigned int cat : 8; + unsigned int : 8; + unsigned char ver[2]; + }; +}; + +/** + * ap_qact(): Query AP combatibility type. + * @qid: The AP queue number + * @apinfo: On input the info about the AP queue. On output the + * alternate AP queue info provided by the qact function + * in GR2 is stored in. * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. + * Returns AP queue status. Check response_code field for failures. */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind); +static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, + union ap_qact_ap_info *apinfo) +{ + register unsigned long reg0 asm ("0") = qid | (5UL << 24) + | ((ifbit & 0x01) << 22); + register unsigned long reg1_in asm ("1") = apinfo->val; + register struct ap_queue_status reg1_out asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(QACT) */ + : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2) + : "d" (reg0) + : "cc"); + apinfo->val = reg2; + return reg1_out; +} + +/** + * ap_nqap(): Send message to adjunct processor queue. + * @qid: The AP queue number + * @psmid: The program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on NQAP can't happen because the L bit is 1. + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status ap_nqap(ap_qid_t qid, + unsigned long long psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* NQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5) + : "cc", "memory"); + return reg1; +} + +/** + * ap_dqap(): Receive message from adjunct processor queue. + * @qid: The AP queue number + * @psmid: Pointer to program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status ap_dqap(ap_qid_t qid, + unsigned long long *psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" /* DQAP */ + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) + : : "cc", "memory"); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} #endif /* _ASM_S390_AP_H_ */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index de023a9a88ca..bf2cbff926ef 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -2,7 +2,7 @@ /* * CPU-measurement facilities * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> * Jan Glauber <jang@linux.vnet.ibm.com> */ @@ -139,8 +139,14 @@ struct hws_trailer_entry { unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ - unsigned long long progusage1; /* 48 - reserved for programming use */ - unsigned long long progusage2; /* */ + union { /* 48 - reserved for programming use */ + struct { + unsigned int clock_base:1; /* in progusage2 */ + unsigned long long progusage1:63; + unsigned long long progusage2; + }; + unsigned long long progusage[2]; + }; } __packed; /* Load program parameter */ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index e07cce88dfb0..fcbd638fb9f4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,14 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +/* Generic bits for GMAP notification on DAT table entry changes. */ +#define GMAP_NOTIFY_SHADOW 0x2 +#define GMAP_NOTIFY_MPROT 0x1 + +/* Status bits only for huge segment entries */ +#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ + /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list @@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], + unsigned long gaddr, unsigned long vmaddr); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 9c5fc50204dd..2d1afa58a4b6 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define arch_clear_hugepage_flags(page) do { } while (0) +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_arch_1, &page->flags); +} static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5bc888841eaf..406d940173ab 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -185,7 +185,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ -} __packed; +} __packed __aligned(8192); #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f5ff9dbad8ac..f31a15044c24 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -24,6 +24,8 @@ typedef struct { unsigned int uses_skeys:1; /* The mmu context uses CMM. */ unsigned int uses_cmm:1; + /* The gmaps associated with this context are allowed to use huge pages. */ + unsigned int allow_gmap_hpage_1m:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d16bc79c30bb..0717ee76885d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; + mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { case _REGION2_SIZE: diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index a01f81186e86..123dac3717b3 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_EXPOLINE +#ifdef CC_USING_EXPOLINE _LC_BR_R1 = __LC_BR_R1 @@ -189,7 +189,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro BASR_EX rsave,rtarget,ruse=%r1 basr \rsave,\rtarget .endm -#endif +#endif /* CC_USING_EXPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 94f8db468c9b..10fe982f2b4b 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -51,6 +51,10 @@ struct zpci_fmb_fmt2 { u64 max_work_units; }; +struct zpci_fmb_fmt3 { + u64 tx_bytes; +}; + struct zpci_fmb { u32 format : 8; u32 fmt_ind : 24; @@ -66,6 +70,7 @@ struct zpci_fmb { struct zpci_fmb_fmt0 fmt0; struct zpci_fmb_fmt1 fmt1; struct zpci_fmb_fmt2 fmt2; + struct zpci_fmb_fmt3 fmt3; }; } __packed __aligned(128); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ab636089c60..0e7cb0dc9c33 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr) #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ -#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL +#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ @@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *sptep, pte_t *tptep, pte_t pte); void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address, + pte_t *ptep); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, @@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr, int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste); +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index 6090670df51f..e297bcfc476f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,11 +13,5 @@ int verify_sha256_digest(void); -extern u64 kernel_entry; -extern u64 kernel_type; - -extern u64 crash_start; -extern u64 crash_size; - #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index de11ecc99c7c..9c9970a5dfb1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -262,7 +262,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 54f81f8ed662..724faede8ac5 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,4 @@ #include <asm-generic/sections.h> -extern char _ehead[]; - #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 9c30ebe046f3..1d66016f4170 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -9,8 +9,10 @@ #include <linux/const.h> #include <uapi/asm/setup.h> - +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" #define PARMAREA 0x10400 +#define PARMAREA_END 0x11000 /* * Machine features detected in early.c diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index dc329aa03f76..83a574e95b3a 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -23,29 +23,29 @@ struct chsc_async_header { __u32 key : 4; __u32 : 28; struct subchannel_id sid; -} __attribute__ ((packed)); +}; struct chsc_async_area { struct chsc_async_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; -} __attribute__ ((packed)); +}; struct chsc_header { __u16 length; __u16 code; -} __attribute__ ((packed)); +}; struct chsc_sync_area { struct chsc_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; -} __attribute__ ((packed)); +}; struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; -} __attribute__ ((packed)); +}; struct chsc_chp_cd { struct chp_id chpid; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2fed39b26b42..dbfd1730e631 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -9,39 +9,21 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code -CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif -GCOV_PROFILE_als.o := n GCOV_PROFILE_early.o := n GCOV_PROFILE_early_nobss.o := n -KCOV_INSTRUMENT_als.o := n KCOV_INSTRUMENT_early.o := n KCOV_INSTRUMENT_early_nobss.o := n -UBSAN_SANITIZE_als.o := n UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n # -# Use -march=z900 for als.c to be able to print an error -# message if the kernel is started on a machine which is too old -# -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) -CFLAGS_als.o += -march=z900 -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += -march=z900 -endif - -CFLAGS_als.o += -D__NO_FORTIFY - -# # Passing null pointers is ok for smp code, since we access the lowcore here. # CFLAGS_smp.o := -Wno-nonnull @@ -61,13 +43,13 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o -extra-y += head.o head64.o vmlinux.lds +extra-y += head64.o vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -99,5 +81,5 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ -chkbss := head.o head64.o als.o early_nobss.o +chkbss := head64.o early_nobss.o include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f5ea9d87069..c3620bafc374 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -306,6 +306,15 @@ static void *kzalloc_panic(int len) return rc; } +static const char *nt_name(Elf64_Word type) +{ + const char *name = "LINUX"; + + if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) + name = KEXEC_CORE_NOTE_NAME; + return name; +} + /* * Initialize ELF note */ @@ -332,11 +341,26 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - const char *note_name = "LINUX"; + return nt_init_name(buf, type, desc, d_len, nt_name(type)); +} + +/* + * Calculate the size of ELF note + */ +static size_t nt_size_name(int d_len, const char *name) +{ + size_t size; - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - note_name = KEXEC_CORE_NOTE_NAME; - return nt_init_name(buf, type, desc, d_len, note_name); + size = sizeof(Elf64_Nhdr); + size += roundup(strlen(name) + 1, 4); + size += roundup(d_len, 4); + + return size; +} + +static inline size_t nt_size(Elf64_Word type, int d_len) +{ + return nt_size_name(d_len, nt_name(type)); } /* @@ -375,6 +399,29 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) } /* + * Calculate size of ELF notes per cpu + */ +static size_t get_cpu_elf_notes_size(void) +{ + struct save_area *sa = NULL; + size_t size; + + size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); + size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); + size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); + size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); + size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); + size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); + size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); + size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + } + + return size; +} + +/* * Initialize prpsinfo note (new kernel) */ static void *nt_prpsinfo(void *ptr) @@ -429,6 +476,30 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +static size_t nt_vmcoreinfo_size(void) +{ + const char *name = "VMCOREINFO"; + char nt_name[11]; + Elf64_Nhdr note; + void *addr; + + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return 0; + + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) + return 0; + + memset(nt_name, 0, sizeof(nt_name)); + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) + return 0; + + if (strcmp(nt_name, name) != 0) + return 0; + + return nt_size_name(note.n_descsz, name); +} + /* * Initialize final note (needed for /proc/vmcore code) */ @@ -539,6 +610,27 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) return ptr; } +static size_t get_elfcorehdr_size(int mem_chunk_cnt) +{ + size_t size; + + size = sizeof(Elf64_Ehdr); + /* PT_NOTES */ + size += sizeof(Elf64_Phdr); + /* nt_prpsinfo */ + size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + /* regsets */ + size += get_cpu_cnt() * get_cpu_elf_notes_size(); + /* nt_vmcoreinfo */ + size += nt_vmcoreinfo_size(); + /* nt_final */ + size += sizeof(Elf64_Nhdr); + /* PT_LOADS */ + size += mem_chunk_cnt * sizeof(Elf64_Phdr); + + return size; +} + /* * Create ELF core header (new kernel) */ @@ -566,8 +658,8 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + - mem_chunk_cnt * sizeof(Elf64_Phdr); + alloc_size = get_elfcorehdr_size(mem_chunk_cnt); + hdr = kzalloc_panic(alloc_size); /* Init elf header */ ptr = ehdr_init(hdr, mem_chunk_cnt); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 827699eb48fa..5b28b434f8a1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -331,8 +331,20 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } +static void __init check_image_bootable(void) +{ + if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) + return; + + sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); + sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); + sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); + disabled_wait(0xbadb007); +} + void __init startup_init(void) { + check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 961abfac2c5f..472fa2f1a4a5 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void verify_facilities(void); void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 791cb9000e86..6d14ad42ba88 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -48,11 +48,23 @@ ENTRY(startup_continue) # Early machine initialization and detection functions. # brasl %r14,startup_init - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... + +# check control registers + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) + + lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + .align 16 .LPG1: -.Lentry:.quad 0x0000000180000000,_stext .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -85,30 +97,5 @@ ENTRY(startup_continue) .endr .Llinkage_stack: .long 0,0,0x89000000,0,0,0,0x8a000000,0 - -ENTRY(_ehead) - - .org 0x100000 - 0x11000 # head.o ends at 0x11000 -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw - - .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 18ae7b9c71d6..bdddaae96559 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -35,6 +35,8 @@ early_param("nospec", nospec_setup_early); static int __init nospec_report(void) { + if (test_facility(156)) + pr_info("Spectre V2 mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) @@ -56,7 +58,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (IS_ENABLED(CC_USING_EXPOLINE)) { + if (test_facility(156)) { + /* + * The machine supports etokens. + * Disable expolines and disable nobp. + */ + if (IS_ENABLED(CC_USING_EXPOLINE)) + nospec_disable = 1; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } else if (IS_ENABLED(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 8affad5f18cb..e30e580ae362 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -13,6 +13,8 @@ ssize_t cpu_show_spectre_v1(struct device *dev, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + if (test_facility(156)) + return sprintf(buf, "Mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) return sprintf(buf, "Mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0292d68e7dde..cb198d4a6dca 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2,7 +2,7 @@ /* * Performance event support for the System z CPU-measurement Sampling Facility * - * Copyright IBM Corp. 2013 + * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ #define KMSG_COMPONENT "cpum_sf" @@ -1587,6 +1587,17 @@ static void aux_buffer_free(void *data) "%lu SDBTs\n", num_sdbt); } +static void aux_sdb_init(unsigned long sdb) +{ + struct hws_trailer_entry *te; + + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + + /* Save clock base */ + te->clock_base = 1; + memcpy(&te->progusage2, &tod_clock_base[1], 8); +} + /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling * @cpu: On which to allocate, -1 means current @@ -1666,6 +1677,7 @@ static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, /* Tail is the entry in a SDBT */ *tail = (unsigned long)pages[i]; aux->sdb_index[i] = (unsigned long)pages[i]; + aux_sdb_init((unsigned long)pages[i]); } sfb->num_sdb = nr_pages; diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 54e2d634b849..4352a504f235 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -12,9 +12,6 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { freg_t fp; - if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) - return 0; - if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) return regs->gprs[idx]; @@ -33,7 +30,8 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if (idx == PERF_REG_S390_PC) return regs->psw.addr; - return regs->gprs[idx]; + WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX); + return 0; } #define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d82a9ec64ea9..c637c12f9e37 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -674,12 +674,12 @@ static void __init reserve_kernel(void) #ifdef CONFIG_DMA_API_DEBUG /* * DMA_API_DEBUG code stumbles over addresses from the - * range [_ehead, _stext]. Mark the memory as reserved + * range [PARMAREA_END, _stext]. Mark the memory as reserved * so it is not used for CONFIG_DMA_API_DEBUG=y. */ memblock_reserve(0, PFN_PHYS(start_pfn)); #else - memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve(0, PARMAREA_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); #endif diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 54f5496913fa..12f80d1f0415 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) } EXPORT_SYMBOL(stsi); +#ifdef CONFIG_PROC_FS + static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { switch (encoding) { @@ -301,6 +303,8 @@ static int __init sysinfo_create_proc(void) } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b6e0397f66d..e8184a15578a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -579,41 +579,33 @@ early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; + int enabled = topology_is_enabled(); int new_mode; - char buf[2]; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &enabled, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - if (buf[0] != '0' && buf[0] != '1') - return -EINVAL; mutex_lock(&smp_cpu_state_mutex); - new_mode = topology_get_mode(buf[0] == '1'); + new_mode = topology_get_mode(enabled); if (topology_mode != new_mode) { topology_mode = new_mode; topology_schedule_update(); } mutex_unlock(&smp_cpu_state_mutex); topology_flush_work(); -out: - *lenp = len; - *ppos += len; - return 0; + + return rc; } static struct ctl_table topology_ctl_table[] = { diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 09abae40f917..3031cc6dd0ab 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -47,7 +47,7 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; -static int vdso_fault(const struct vm_special_mapping *sm, +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page **vdso_pagelist; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index f0414f52817b..b43f8d33a369 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -19,7 +19,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(startup) +ENTRY(startup_continue) jiffies = jiffies_64; PHDRS { @@ -30,16 +30,12 @@ PHDRS { SECTIONS { - . = 0x00000000; + . = 0x100000; + _stext = .; /* Start of text section */ .text : { /* Text and read-only data */ + _text = .; HEAD_TEXT - /* - * E.g. perf doesn't like symbols starting at address zero, - * therefore skip the initial PSW and channel program located - * at address zero and let _text start at 0x200. - */ - _text = 0x200; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT @@ -47,6 +43,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) } :text = 0x0700 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3b7a5151b6a5..f9d90337e64a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -172,6 +172,10 @@ static int nested; module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); +/* allow 1m huge page guest backing, if !nested */ +static int hpage; +module_param(hpage, int, 0444); +MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; + case KVM_CAP_S390_HPAGE_1M: + r = 0; + if (hpage) + r = 1; + break; case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; @@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) } static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot) { + int i; gfn_t cur_gfn, last_gfn; - unsigned long address; + unsigned long gaddr, vmaddr; struct gmap *gmap = kvm->arch.gmap; + DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); - /* Loop over all guest pages */ + /* Loop over all guest segments */ + cur_gfn = memslot->base_gfn; last_gfn = memslot->base_gfn + memslot->npages; - for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { - address = gfn_to_hva_memslot(memslot, cur_gfn); + for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { + gaddr = gfn_to_gpa(cur_gfn); + vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); + if (kvm_is_error_hva(vmaddr)) + continue; + + bitmap_zero(bitmap, _PAGE_ENTRIES); + gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); + for (i = 0; i < _PAGE_ENTRIES; i++) { + if (test_bit(i, bitmap)) + mark_page_dirty(kvm, cur_gfn + i); + } - if (test_and_clear_guest_dirty(gmap->mm, address)) - mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; cond_resched(); @@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_HPAGE_1M: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else if (!hpage || kvm->arch.use_cmma) + r = -EINVAL; + else { + r = 0; + kvm->mm->context.allow_gmap_hpage_1m = 1; + /* + * We might have to create fake 4k page + * tables. To avoid that the hardware works on + * stale PGSTEs, we emulate these instructions. + */ + kvm->arch.use_skf = 0; + kvm->arch.use_pfmfi = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!sclp.has_cmma) break; - ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (kvm->created_vcpus) + ret = -EBUSY; + else if (kvm->mm->context.allow_gmap_hpage_1m) + ret = -EINVAL; + else { kvm->arch.use_cmma = 1; /* Not compatible with cmma. */ kvm->arch.use_pfmfi = 0; @@ -1540,6 +1584,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) uint8_t *keys; uint64_t hva; int srcu_idx, i, r = 0; + bool unlocked; if (args->flags != 0) return -EINVAL; @@ -1564,9 +1609,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + i = 0; down_read(¤t->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < args->count; i++) { + while (i < args->count) { + unlocked = false; hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; @@ -1580,8 +1627,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } r = set_guest_storage_key(current->mm, hva, keys[i], 0); - if (r) - break; + if (r) { + r = fixup_user_fault(current, current->mm, hva, + FAULT_FLAG_WRITE, &unlocked); + if (r) + break; + } + if (!r) + i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); @@ -4082,6 +4135,11 @@ static int __init kvm_s390_init(void) return -ENODEV; } + if (nested && hpage) { + pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + return -EINVAL; + } + for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index eb0eb60c7be6..cfc5a62329f6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) static int handle_iske(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long gaddr, vmaddr; unsigned char key; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_iske++; @@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = get_guest_storage_key(current->mm, addr, &key); - up_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, vmaddr, &key); + + if (rc) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + up_read(¤t->mm->mmap_sem); vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) static int handle_rrbe(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long vmaddr, gaddr; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_rrbe++; @@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = reset_guest_reference_bit(current->mm, addr); - up_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, vmaddr); + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - + up_read(¤t->mm->mmap_sem); kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned char key, oldkey; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_sske++; @@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unlocked = false; - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - start += PAGE_SIZE; + + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long useraddr; + unsigned long vmaddr; + bool unlocked = false; /* Translate guest address to host address */ - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); - if (kvm_is_error_hva(useraddr)) + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)useraddr, PAGE_SIZE)) + if (clear_user((void __user *)vmaddr, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, useraddr, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - start += PAGE_SIZE; + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; + } } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 2311f15be9cf..40c4d59c926e 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -17,7 +17,7 @@ ENTRY(memmove) ltgr %r4,%r4 lgr %r1,%r2 - bzr %r14 + jz .Lmemmove_exit aghi %r4,-1 clgr %r2,%r3 jnh .Lmemmove_forward @@ -36,6 +36,7 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) +.Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) @@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove) */ ENTRY(memset) ltgr %r4,%r4 - bzr %r14 + jz .Lmemset_exit ltgr %r3,%r3 jnz .Lmemset_fill aghi %r4,-1 @@ -80,6 +81,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) +.Lmemset_exit: BR_EX %r14 .Lmemset_fill: cghi %r4,1 @@ -115,7 +117,7 @@ EXPORT_SYMBOL(memset) */ ENTRY(memcpy) ltgr %r4,%r4 - bzr %r14 + jz .Lmemcpy_exit aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 @@ -124,6 +126,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) +.Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) @@ -145,9 +148,9 @@ EXPORT_SYMBOL(memcpy) .macro __MEMSET bits,bytes,insn ENTRY(__memset\bits) ltgr %r4,%r4 - bzr %r14 + jz .L__memset_exit\bits cghi %r4,\bytes - je .L__memset_exit\bits + je .L__memset_store\bits aghi %r4,-(\bytes+1) srlg %r5,%r4,8 ltgr %r5,%r5 @@ -163,8 +166,9 @@ ENTRY(__memset\bits) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) BR_EX %r14 -.L__memset_exit\bits: +.L__memset_store\bits: \insn %r3,0(%r2) +.L__memset_exit\bits: BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 6cf024eb2085..510a18299196 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -191,12 +191,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - if (timer_pending(&cmm_timer)) { - if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) - return; - } - cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; - add_timer(&cmm_timer); + mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); } static void cmm_timer_fn(struct timer_list *unused) @@ -251,45 +246,42 @@ static int cmm_skip_blanks(char *cp, char **endp) return str != cp; } -static struct ctl_table cmm_table[]; - static int cmm_pages_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - char buf[16], *p; - unsigned int len; - long nr; + long nr = cmm_get_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - cmm_skip_blanks(buf, &p); - nr = simple_strtoul(p, &p, 0); - if (ctl == &cmm_table[0]) - cmm_set_pages(nr); - else - cmm_add_timed_pages(nr); - } else { - if (ctl == &cmm_table[0]) - nr = cmm_get_pages(); - else - nr = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", nr); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - } - *lenp = len; - *ppos += len; + cmm_set_pages(nr); + return 0; +} + +static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + long nr = cmm_get_timed_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; + + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + + cmm_add_timed_pages(nr); return 0; } @@ -338,7 +330,7 @@ static struct ctl_table cmm_table[] = { { .procname = "cmm_timed_pages", .mode = 0644, - .proc_handler = cmm_pages_handler, + .proc_handler = cmm_timed_pages_handler, }, { .procname = "cmm_timeout", diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 6ad15d3fab81..84111a43ea29 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -80,7 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e074480d3598..4cc3f06b0ab3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -502,6 +502,8 @@ retry: /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bc56ec8abcf7..bb44990c8212 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,8 +2,10 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016 + * Copyright IBM Corp. 2007, 2016, 2018 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> */ #include <linux/kernel.h> @@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, rcu_read_unlock(); } +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, + unsigned long gaddr); + /** * gmap_link - set up shadow page tables to connect a host to a guest address * @gmap: pointer to guest mapping meta data structure @@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) p4d_t *p4d; pud_t *pud; pmd_t *pmd; + u64 unprot; int rc; BUG_ON(gmap_is_shadow(gmap)); @@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) + /* Are we allowed to use huge pages? */ + if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ rc = radix_tree_preload(GFP_KERNEL); @@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; + if (!rc) { + if (pmd_large(*pmd)) { + *table = (pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) + | _SEGMENT_ENTRY_GMAP_UC; + } else + *table = pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS; + } + } else if (*table & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { + unprot = (u64)*table; + unprot &= ~_SEGMENT_ENTRY_PROTECT; + unprot |= _SEGMENT_ENTRY_GMAP_UC; + gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); + } spin_unlock(&gmap->guest_table_lock); spin_unlock(ptl); radix_tree_preload_end(); @@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + /* + * We do not discard pages that are backed by + * hugetlbfs, so we don't have to refault them. + */ + if (vma && is_vm_hugetlb_page(vma)) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } @@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, */ static void gmap_pte_op_end(spinlock_t *ptl) { - spin_unlock(ptl); + if (ptl) + spin_unlock(ptl); +} + +/** + * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock + * and return the pmd pointer + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * + * Returns a pointer to the pmd for a guest address, or NULL + */ +static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) +{ + pmd_t *pmdp; + + BUG_ON(gmap_is_shadow(gmap)); + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); + + if (!pmdp || pmd_none(*pmdp)) { + spin_unlock(&gmap->guest_table_lock); + return NULL; + } + + /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ + if (!pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); + return pmdp; +} + +/** + * gmap_pmd_op_end - release the guest_table_lock if needed + * @gmap: pointer to the guest mapping meta data structure + * @pmdp: pointer to the pmd + */ +static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) +{ + if (pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); +} + +/* + * gmap_protect_pmd - remove access rights to memory and set pmd notification bits + * @pmdp: pointer to the pmd to be protected + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns: + * 0 if successfully protected + * -EAGAIN if a fixup is needed + * -EINVAL if unsupported notifier bits have been specified + * + * Expected to be called with sg->mm->mmap_sem in read and + * guest_table_lock held. + */ +static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; + int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; + pmd_t new = *pmdp; + + /* Fixup needed */ + if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) + return -EAGAIN; + + if (prot == PROT_NONE && !pmd_i) { + pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (prot == PROT_READ && !pmd_p) { + pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; + pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (bits & GMAP_NOTIFY_MPROT) + pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + + /* Shadow GMAP protection needs split PMDs */ + if (bits & GMAP_NOTIFY_SHADOW) + return -EINVAL; + + return 0; +} + +/* + * gmap_protect_pte - remove access rights to memory and set pgste bits + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @pmdp: pointer to the pmd associated with the pte + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns 0 if successfully protected, -ENOMEM if out of memory and + * -EAGAIN if a fixup is needed. + * + * Expected to be called with sg->mm->mmap_sem in read + */ +static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int rc; + pte_t *ptep; + spinlock_t *ptl = NULL; + unsigned long pbits = 0; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return -EAGAIN; + + ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); + if (!ptep) + return -ENOMEM; + + pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; + pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; + /* Protect and unlock. */ + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); + gmap_pte_op_end(ptl); + return rc; } /* @@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl) static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) { - unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + unsigned long vmaddr, dist; + pmd_t *pmdp; int rc; BUG_ON(gmap_is_shadow(gmap)); while (len) { rc = -EAGAIN; - ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); - if (ptep) { - rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); - gmap_pte_op_end(ptl); + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (pmdp) { + if (!pmd_large(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + len -= PAGE_SIZE; + gaddr += PAGE_SIZE; + } + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); + len = len < dist ? 0 : len - dist; + gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; + } + } + gmap_pmd_op_end(gmap, pmdp); } if (rc) { + if (rc == -EINVAL) + return rc; + + /* -EAGAIN, fixup of userspace mm and gmap */ vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); if (rc) return rc; - continue; } - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; } return 0; } @@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; down_read(&gmap->mm->mmap_sem); - rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); + rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); up_read(&gmap->mm->mmap_sem); return rc; } @@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, unsigned long limit; int rc; + BUG_ON(parent->mm->context.allow_gmap_hpage_1m); BUG_ON(gmap_is_shadow(parent)); spin_lock(&parent->shadow_lock); sg = gmap_find_shadow(parent, asce, edat_level); @@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, PGSTE_VSIE_BIT); + PROT_READ, GMAP_NOTIFY_SHADOW); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); new->initialized = true; @@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, } EXPORT_SYMBOL_GPL(ptep_notify); +static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); +} + +/** + * gmap_pmdp_xchg - exchange a gmap pmd with another + * @gmap: pointer to the guest address space structure + * @pmdp: pointer to the pmd entry + * @new: replacement entry + * @gaddr: the affected guest address + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, + unsigned long gaddr) +{ + gaddr &= HPAGE_MASK; + pmdp_notify_gmap(gmap, pmdp, gaddr); + pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, + IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *pmdp = new; +} + +static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, + int purge) +{ + pmd_t *pmdp; + struct gmap *gmap; + unsigned long gaddr; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (pmdp) { + gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (purge) + __pmdp_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} + +/** + * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without + * flushing + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 0); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); + +/** + * gmap_pmdp_csp - csp all affected guest pmd entries + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 1); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_csp); + +/** + * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_LOCAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); + +/** + * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); + +/** + * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status + * @gmap: pointer to guest address space + * @pmdp: pointer to the pmd to be tested + * @gaddr: virtual address in the guest address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return false; + + /* Already protected memory, which did not change is clean */ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) + return false; + + /* Clear UC indication and reset protection */ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); + return true; +} + +/** + * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment + * @gmap: pointer to guest address space + * @bitmap: dirty bitmap for this pmd + * @gaddr: virtual address in the guest address space + * @vmaddr: virtual address in the host address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], + unsigned long gaddr, unsigned long vmaddr) +{ + int i; + pmd_t *pmdp; + pte_t *ptep; + spinlock_t *ptl; + + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return; + + if (pmd_large(*pmdp)) { + if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) + bitmap_fill(bitmap, _PAGE_ENTRIES); + } else { + for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { + ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); + if (!ptep) + continue; + if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) + set_bit(i, bitmap); + spin_unlock(ptl); + } + } + gmap_pmd_op_end(gmap, pmdp); +} +EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); + static inline void thp_split_mm(struct mm_struct *mm) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); * Enable storage key handling from now on and initialize the storage * keys with the default key. */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) +static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) { /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; } +static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, + unsigned long hmask, unsigned long next, + struct mm_walk *walk) +{ + pmd_t *pmd = (pmd_t *)pte; + unsigned long start, end; + struct page *page = pmd_page(*pmd); + + /* + * The write check makes sure we do not set a key on shared + * memory. This is needed as the walker does not differentiate + * between actual guest memory and the process executable or + * shared libraries. + */ + if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || + !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) + return 0; + + start = pmd_val(*pmd) & HPAGE_MASK; + end = start + HPAGE_SIZE - 1; + __storage_key_init_range(start, end); + set_bit(PG_arch_1, &page->flags); + return 0; +} + int s390_enable_skey(void) { - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_walk walk = { + .hugetlb_entry = __s390_enable_skey_hugetlb, + .pte_entry = __s390_enable_skey_pte, + }; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc = 0; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470..b0246c705a19 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste) return pte; } +static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) +{ + struct page *page; + unsigned long size, paddr; + + if (!mm_uses_skeys(mm) || + rste & _SEGMENT_ENTRY_INVALID) + return; + + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + page = pud_page(__pud(rste)); + size = PUD_SIZE; + paddr = rste & PUD_MASK; + } else { + page = pmd_page(__pmd(rste)); + size = PMD_SIZE; + paddr = rste & PMD_MASK; + } + + if (!test_and_set_bit(PG_arch_1, &page->flags)) + __storage_key_init_range(paddr, paddr + size - 1); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; else rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 382153ff17e3..dc3cede7f2ec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable_dat(page, 0); + set_page_stable_dat(page, order); else set_page_unused(page, order); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c44171588d08..f8c6faab41f4 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -14,7 +14,7 @@ static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { - asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" : [addr] "+a" (addr) : [skey] "d" (skey)); return addr; } @@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; - if (!PAGE_DEFAULT_KEY) - return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) continue; } } - page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); start += PAGE_SIZE; } } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e3bd5627afef..76d89ee8b428 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -28,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = { .data = &page_table_allocate_pgste, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &page_table_allocate_pgste_min, .extra2 = &page_table_allocate_pgste_max, }, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 301e466e4263..f2cc7da473e4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm, mm->context.asce, IDTE_LOCAL); else __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_local(mm, addr); } static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (MACHINE_HAS_TLB_GUEST) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else if (MACHINE_HAS_IDTE) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); - else + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else { __pmdp_csp(pmdp); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_csp(mm, addr); + } } static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, @@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + gmap_pmdp_invalidate(mm, addr); } else { pmdp_idte_global(mm, addr, pmdp); } @@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + return pmd; +} + pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) { @@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* * Test and reset if a guest page is dirty */ -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - spinlock_t *ptl; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pgste_t pgste; - pte_t *ptep; pte_t pte; bool dirty; int nodat; - pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return false; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return false; - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return false; - /* We can't run guests backed by huge pages, but userspace can - * still set them up and then try to migrate them without any - * migration support. - */ - if (pmd_large(*pmd)) - return true; - - ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (unlikely(!ptep)) - return false; - pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); pgste_val(pgste) &= ~PGSTE_UC_BIT; @@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) *ptep = pte; } pgste_set_unlock(ptep, pgste); - - spin_unlock(ptl); return dirty; } -EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); +EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) { - unsigned long keyul; + unsigned long keyul, paddr; spinlock_t *ptl; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + /* + * Huge pmds need quiescing operations, they are + * always mapped. + */ + page_set_storage_key(paddr, key, 1); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; + unsigned long bits, skey; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); + paddr = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(paddr); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, skey, !nq); + page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; } @@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + unsigned long paddr; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; int cc = 0; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + cc = page_reset_referenced(paddr); + spin_unlock(ptl); + return cc; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pgste_val(new) &= ~PGSTE_GR_BIT; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + paddr = pte_val(*ptep) & PAGE_MASK; + cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; } @@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { + unsigned long paddr; spinlock_t *ptl; pgste_t pgste; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + /* Not yet mapped memory has a zero key */ + spin_unlock(ptl); + *key = 0; + return 0; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + *key = page_get_storage_key(paddr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + paddr = pte_val(*ptep) & PAGE_MASK; if (!(pte_val(*ptep) & _PAGE_INVALID)) - *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(paddr); /* Reflect guest's logical view, not physical */ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5f0234ec8038..d7052cbe984f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -485,8 +485,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct lowcore, br_r1_trampoline)); diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 06a80434cfe6..5bd374491f94 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -134,6 +134,8 @@ void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); nodes_clear(node_possible_map); + /* Initially attach all possible CPUs to node 0. */ + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); if (mode->setup) mode->setup(); numa_setup_memory(); @@ -141,20 +143,6 @@ void __init numa_setup(void) } /* - * numa_init_early() - Initialization initcall - * - * This runs when only one CPU is online and before the first - * topology update is called for by the scheduler. - */ -static int __init numa_init_early(void) -{ - /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - return 0; -} -early_initcall(numa_init_early); - -/* * numa_init_late() - Initialization initcall * * Register NUMA nodes. diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index b482e95b6249..57f7cdac70a3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -48,6 +48,10 @@ static char *pci_fmt2_names[] = { "Maximum work units", }; +static char *pci_fmt3_names[] = { + "Transmitted bytes", +}; + static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", @@ -112,6 +116,10 @@ static int pci_perf_show(struct seq_file *m, void *v) pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names), &zdev->fmb->fmt2.consumed_work_units); break; + case 3: + pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names), + &zdev->fmb->fmt3.tx_bytes); + break; default: seq_puts(m, "Unknown format\n"); } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 1ace023cbdce..8d61218a71aa 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -7,13 +7,13 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o targets += $(purgatory-y) purgatory.ro kexec-purgatory.c PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) -$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib @@ -21,8 +21,9 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 660c96a05a9b..2e3707b12edd 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -243,33 +243,26 @@ gprregs: .quad 0 .endr -purgatory_sha256_digest: - .global purgatory_sha256_digest - .rept 32 /* SHA256_DIGEST_SIZE */ - .byte 0 - .endr - -purgatory_sha_regions: - .global purgatory_sha_regions - .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ - .byte 0 - .endr - -kernel_entry: - .global kernel_entry - .quad 0 - -kernel_type: - .global kernel_type - .quad 0 - -crash_start: - .global crash_start - .quad 0 +/* Macro to define a global variable with name and size (in bytes) to be + * shared with C code. + * + * Add the .size and .type attribute to satisfy checks on the Elf_Sym during + * purgatory load. + */ +.macro GLOBAL_VARIABLE name,size +\name: + .global \name + .size \name,\size + .type \name,object + .skip \size,0 +.endm -crash_size: - .global crash_size - .quad 0 +GLOBAL_VARIABLE purgatory_sha256_digest,32 +GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE +GLOBAL_VARIABLE kernel_entry,8 +GLOBAL_VARIABLE kernel_type,8 +GLOBAL_VARIABLE crash_start,8 +GLOBAL_VARIABLE crash_size,8 .align PAGE_SIZE stack: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 4e2beb3c29b7..3528e6da4e87 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -12,15 +12,6 @@ #include <linux/string.h> #include <asm/purgatory.h> -struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; -u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; - -u64 kernel_entry; -u64 kernel_type; - -u64 crash_start; -u64 crash_size; - int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index d92f2d94a5d9..9bba2c14e0ca 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -2,13 +2,22 @@ quiet_cmd_chkbss = CHKBSS $< define cmd_chkbss + rm -f $@; \ if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ echo "error: $< .bss section is not empty" >&2; exit 1; \ fi; \ touch $@; endef -$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +chkbss-target ?= $(obj)/built-in.a +ifneq (,$(findstring /,$(chkbss))) +chkbss-files := $(patsubst %, %.chkbss, $(chkbss)) +else +chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +endif + +$(chkbss-target): $(chkbss-files) +targets += $(notdir $(chkbss-files)) %.o.chkbss: %.o $(call cmd,chkbss) diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index 259aa0680d1a..a1bc02b29c81 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -257,7 +257,7 @@ static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) if (!desc->group) exit(EXIT_FAILURE); group = &desc->group[desc->nr_groups - 1]; - strncpy(group->opcode, insn->opcode, 2); + memcpy(group->opcode, insn->opcode, 2); group->type = insn->type; group->offset = offset; group->count = 1; @@ -283,7 +283,7 @@ static void print_opcode_table(struct gen_opcode *desc) continue; add_to_group(desc, insn, offset); if (strncmp(opcode, insn->opcode, 2)) { - strncpy(opcode, insn->opcode, 2); + memcpy(opcode, insn->opcode, 2); printf("\t/* %.2s */ \\\n", opcode); } print_opcode(insn, offset); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a9f60d0ee02e..a23e7d394a0a 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -73,8 +73,8 @@ static int dasd_alloc_queue(struct dasd_block *); static void dasd_setup_queue(struct dasd_block *); static void dasd_free_queue(struct dasd_block *); static int dasd_flush_block_queue(struct dasd_block *); -static void dasd_device_tasklet(struct dasd_device *); -static void dasd_block_tasklet(struct dasd_block *); +static void dasd_device_tasklet(unsigned long); +static void dasd_block_tasklet(unsigned long); static void do_kick_device(struct work_struct *); static void do_restore_device(struct work_struct *); static void do_reload_device(struct work_struct *); @@ -125,8 +125,7 @@ struct dasd_device *dasd_alloc_device(void) dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE); spin_lock_init(&device->mem_lock); atomic_set(&device->tasklet_scheduled, 0); - tasklet_init(&device->tasklet, - (void (*)(unsigned long)) dasd_device_tasklet, + tasklet_init(&device->tasklet, dasd_device_tasklet, (unsigned long) device); INIT_LIST_HEAD(&device->ccw_queue); timer_setup(&device->timer, dasd_device_timeout, 0); @@ -166,8 +165,7 @@ struct dasd_block *dasd_alloc_block(void) atomic_set(&block->open_count, -1); atomic_set(&block->tasklet_scheduled, 0); - tasklet_init(&block->tasklet, - (void (*)(unsigned long)) dasd_block_tasklet, + tasklet_init(&block->tasklet, dasd_block_tasklet, (unsigned long) block); INIT_LIST_HEAD(&block->ccw_queue); spin_lock_init(&block->queue_lock); @@ -2064,8 +2062,9 @@ EXPORT_SYMBOL_GPL(dasd_flush_device_queue); /* * Acquire the device lock and process queues for the device. */ -static void dasd_device_tasklet(struct dasd_device *device) +static void dasd_device_tasklet(unsigned long data) { + struct dasd_device *device = (struct dasd_device *) data; struct list_head final_queue; atomic_set (&device->tasklet_scheduled, 0); @@ -2783,8 +2782,9 @@ static void __dasd_block_start_head(struct dasd_block *block) * block layer request queue, creates ccw requests, enqueues them on * a dasd_device and processes ccw requests that have been returned. */ -static void dasd_block_tasklet(struct dasd_block *block) +static void dasd_block_tasklet(unsigned long data) { + struct dasd_block *block = (struct dasd_block *) data; struct list_head final_queue; struct list_head *l, *n; struct dasd_ccw_req *cqr; @@ -3127,6 +3127,7 @@ static int dasd_alloc_queue(struct dasd_block *block) block->tag_set.nr_hw_queues = nr_hw_queues; block->tag_set.queue_depth = queue_depth; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + block->tag_set.numa_node = NUMA_NO_NODE; rc = blk_mq_alloc_tag_set(&block->tag_set); if (rc) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index e36a114354fc..b9ce93e9df89 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -708,7 +708,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, struct ccw1 *ccw; cqr = lcu->rsu_cqr; - strncpy((char *) &cqr->magic, "ECKD", 4); + memcpy((char *) &cqr->magic, "ECKD", 4); ASCEBC((char *) &cqr->magic, 4); ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RSCK; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index b9ebb565ee2c..fab35c6170cc 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -426,7 +426,7 @@ dasd_add_busid(const char *bus_id, int features) if (!devmap) { /* This bus_id is new. */ new->devindex = dasd_max_devindex++; - strncpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); + strlcpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); new->features = features; new->device = NULL; list_add(&new->list, &dasd_hashlists[hash]); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index bbf95b78ef5d..4e7b55a14b1a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1780,6 +1780,9 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) struct dasd_eckd_private *private = device->private; int i; + if (!private) + return; + dasd_alias_disconnect_device_from_lcu(device); private->ned = NULL; private->sneq = NULL; @@ -2035,8 +2038,11 @@ static int dasd_eckd_basic_to_ready(struct dasd_device *device) static int dasd_eckd_online_to_ready(struct dasd_device *device) { - cancel_work_sync(&device->reload_device); - cancel_work_sync(&device->kick_validate); + if (cancel_work_sync(&device->reload_device)) + dasd_put_device(device); + if (cancel_work_sync(&device->kick_validate)) + dasd_put_device(device); + return 0; }; @@ -3535,7 +3541,7 @@ static int prepare_itcw(struct itcw *itcw, dcw = itcw_add_dcw(itcw, pfx_cmd, 0, &pfxdata, sizeof(pfxdata), total_data_size); - return PTR_RET(dcw); + return PTR_ERR_OR_ZERO(dcw); } static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index 6ef8714dc693..93bb09da7fdc 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -313,7 +313,7 @@ static void dasd_eer_write_standard_trigger(struct dasd_device *device, ktime_get_real_ts64(&ts); header.tv_sec = ts.tv_sec; header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; - strncpy(header.busid, dev_name(&device->cdev->dev), + strlcpy(header.busid, dev_name(&device->cdev->dev), DASD_EER_BUSID_SIZE); spin_lock_irqsave(&bufferlock, flags); @@ -356,7 +356,7 @@ static void dasd_eer_write_snss_trigger(struct dasd_device *device, ktime_get_real_ts64(&ts); header.tv_sec = ts.tv_sec; header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; - strncpy(header.busid, dev_name(&device->cdev->dev), + strlcpy(header.busid, dev_name(&device->cdev->dev), DASD_EER_BUSID_SIZE); spin_lock_irqsave(&bufferlock, flags); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index b1fcb76dd272..98f66b7b6794 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -455,6 +455,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + bdev->tag_set.numa_node = NUMA_NO_NODE; ret = blk_mq_alloc_tag_set(&bdev->tag_set); if (ret) diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 0a4c13e1e76e..c6ab34f94b1b 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -12,11 +12,6 @@ GCOV_PROFILE_sclp_early_core.o := n KCOV_INSTRUMENT_sclp_early_core.o := n UBSAN_SANITIZE_sclp_early_core.o := n -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += -march=z900 -endif - CFLAGS_sclp_early_core.o += -D__NO_FORTIFY CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_EXPOLINE) diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 79eb60958015..bbb3001b0961 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -334,37 +334,41 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, int cmd, int perm) { struct kbentry tmp; + unsigned long kb_index, kb_table; ushort *key_map, val, ov; if (copy_from_user(&tmp, user_kbe, sizeof(struct kbentry))) return -EFAULT; + kb_index = (unsigned long) tmp.kb_index; #if NR_KEYS < 256 - if (tmp.kb_index >= NR_KEYS) + if (kb_index >= NR_KEYS) return -EINVAL; #endif + kb_table = (unsigned long) tmp.kb_table; #if MAX_NR_KEYMAPS < 256 - if (tmp.kb_table >= MAX_NR_KEYMAPS) + if (kb_table >= MAX_NR_KEYMAPS) return -EINVAL; + kb_table = array_index_nospec(kb_table , MAX_NR_KEYMAPS); #endif switch (cmd) { case KDGKBENT: - key_map = kbd->key_maps[tmp.kb_table]; + key_map = kbd->key_maps[kb_table]; if (key_map) { - val = U(key_map[tmp.kb_index]); + val = U(key_map[kb_index]); if (KTYP(val) >= KBD_NR_TYPES) val = K_HOLE; } else - val = (tmp.kb_index ? K_HOLE : K_NOSUCHMAP); + val = (kb_index ? K_HOLE : K_NOSUCHMAP); return put_user(val, &user_kbe->kb_value); case KDSKBENT: if (!perm) return -EPERM; - if (!tmp.kb_index && tmp.kb_value == K_NOSUCHMAP) { + if (!kb_index && tmp.kb_value == K_NOSUCHMAP) { /* disallocate map */ - key_map = kbd->key_maps[tmp.kb_table]; + key_map = kbd->key_maps[kb_table]; if (key_map) { - kbd->key_maps[tmp.kb_table] = NULL; + kbd->key_maps[kb_table] = NULL; kfree(key_map); } break; @@ -375,18 +379,18 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, if (KVAL(tmp.kb_value) > kbd_max_vals[KTYP(tmp.kb_value)]) return -EINVAL; - if (!(key_map = kbd->key_maps[tmp.kb_table])) { + if (!(key_map = kbd->key_maps[kb_table])) { int j; key_map = kmalloc(sizeof(plain_map), GFP_KERNEL); if (!key_map) return -ENOMEM; - kbd->key_maps[tmp.kb_table] = key_map; + kbd->key_maps[kb_table] = key_map; for (j = 0; j < NR_KEYS; j++) key_map[j] = U(K_HOLE); } - ov = U(key_map[tmp.kb_index]); + ov = U(key_map[kb_index]); if (tmp.kb_value == ov) break; /* nothing to do */ /* @@ -395,7 +399,7 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, if (((ov == K_SAK) || (tmp.kb_value == K_SAK)) && !capable(CAP_SYS_ADMIN)) return -EPERM; - key_map[tmp.kb_index] = U(tmp.kb_value); + key_map[kb_index] = U(tmp.kb_value); break; } return 0; diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 76c158c41510..4f1a69c9d81d 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -61,7 +61,7 @@ static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) struct appldata_product_id id; int rc; - strncpy(id.prod_nr, "LNXAPPL", 7); + memcpy(id.prod_nr, "LNXAPPL", 7); id.prod_fn = myhdr->applid; id.record_nr = myhdr->record_num; id.version_nr = myhdr->version; diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index ee6f3b563728..e69b12a40636 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -64,42 +64,18 @@ static struct notifier_block call_home_panic_nb = { .priority = INT_MAX, }; -static int proc_handler_callhome(struct ctl_table *ctl, int write, - void __user *buffer, size_t *count, - loff_t *ppos) -{ - unsigned long val; - int len, rc; - char buf[3]; - - if (!*count || (*ppos && !write)) { - *count = 0; - return 0; - } - if (!write) { - len = snprintf(buf, sizeof(buf), "%d\n", callhome_enabled); - rc = copy_to_user(buffer, buf, sizeof(buf)); - if (rc != 0) - return -EFAULT; - } else { - len = *count; - rc = kstrtoul_from_user(buffer, len, 0, &val); - if (rc) - return rc; - if (val != 0 && val != 1) - return -EINVAL; - callhome_enabled = val; - } - *count = len; - *ppos += len; - return 0; -} +static int zero; +static int one = 1; static struct ctl_table callhome_table[] = { { .procname = "callhome", + .data = &callhome_enabled, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_handler_callhome, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, {} }; diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 37e65a05517f..cdcde18e7220 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -113,16 +113,16 @@ static int crypt_enabled(struct tape_device *device) static void ext_to_int_kekl(struct tape390_kekl *in, struct tape3592_kekl *out) { - int i; + int len; memset(out, 0, sizeof(*out)); if (in->type == TAPE390_KEKL_TYPE_HASH) out->flags |= 0x40; if (in->type_on_tape == TAPE390_KEKL_TYPE_HASH) out->flags |= 0x80; - strncpy(out->label, in->label, 64); - for (i = strlen(in->label); i < sizeof(out->label); i++) - out->label[i] = ' '; + len = min(sizeof(out->label), strlen(in->label)); + memcpy(out->label, in->label, len); + memset(out->label + len, ' ', sizeof(out->label) - len); ASCEBC(out->label, sizeof(out->label)); } diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index a07102472ce9..b58df0dd0039 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -54,10 +54,10 @@ struct tape_class_device *register_tape_dev( if (!tcd) return ERR_PTR(-ENOMEM); - strncpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); + strlcpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); for (s = strchr(tcd->device_name, '/'); s; s = strchr(s, '/')) *s = '!'; - strncpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); + strlcpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); for (s = strchr(tcd->mode_name, '/'); s; s = strchr(s, '/')) *s = '!'; @@ -77,7 +77,7 @@ struct tape_class_device *register_tape_dev( tcd->class_device = device_create(tape_class, device, tcd->char_device->dev, NULL, "%s", tcd->device_name); - rc = PTR_RET(tcd->class_device); + rc = PTR_ERR_OR_ZERO(tcd->class_device); if (rc) goto fail_with_cdev; rc = sysfs_create_link( diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index afbdee74147d..51038ec309c1 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -471,14 +471,17 @@ int chp_new(struct chp_id chpid) { struct channel_subsystem *css = css_by_id(chpid.cssid); struct channel_path *chp; - int ret; + int ret = 0; + mutex_lock(&css->mutex); if (chp_is_registered(chpid)) - return 0; - chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL); - if (!chp) - return -ENOMEM; + goto out; + chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL); + if (!chp) { + ret = -ENOMEM; + goto out; + } /* fill in status, etc. */ chp->chpid = chpid; chp->state = 1; @@ -505,21 +508,20 @@ int chp_new(struct chp_id chpid) put_device(&chp->dev); goto out; } - mutex_lock(&css->mutex); + if (css->cm_enabled) { ret = chp_add_cmg_attr(chp); if (ret) { device_unregister(&chp->dev); - mutex_unlock(&css->mutex); goto out; } } css->chps[chpid.id] = chp; - mutex_unlock(&css->mutex); goto out; out_free: kfree(chp); out: + mutex_unlock(&css->mutex); return ret; } @@ -585,8 +587,7 @@ static void chp_process_crw(struct crw *crw0, struct crw *crw1, switch (crw0->erc) { case CRW_ERC_IPARM: /* Path has come. */ case CRW_ERC_INIT: - if (!chp_is_registered(chpid)) - chp_new(chpid); + chp_new(chpid); chsc_chp_online(chpid); break; case CRW_ERC_PERRI: /* Path has gone. */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9029804dcd22..a0baee25134c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -91,7 +91,7 @@ struct chsc_ssd_area { u16 sch; /* subchannel */ u8 chpid[8]; /* chpids 0-7 */ u16 fla[8]; /* full link addresses 0-7 */ -} __attribute__ ((packed)); +} __packed __aligned(PAGE_SIZE); int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) { @@ -319,7 +319,7 @@ struct chsc_sei { struct chsc_sei_nt2_area nt2_area; u8 nt_area[PAGE_SIZE - 24]; } u; -} __packed; +} __packed __aligned(PAGE_SIZE); /* * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands" @@ -841,7 +841,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable) u32 : 4; u32 fmt : 4; u32 : 16; - } __attribute__ ((packed)) *secm_area; + } *secm_area; unsigned long flags; int ret, ccode; @@ -1014,7 +1014,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) u32 cmg : 8; u32 zeroes3; u32 data[NR_MEASUREMENT_CHARS]; - } __attribute__ ((packed)) *scmc_area; + } *scmc_area; chp->shared = -1; chp->cmg = -1; @@ -1142,7 +1142,7 @@ int __init chsc_get_cssid(int idx) u8 cssid; u32 : 24; } list[0]; - } __packed *sdcal_area; + } *sdcal_area; int ret; spin_lock_irq(&chsc_page_lock); @@ -1192,7 +1192,7 @@ chsc_determine_css_characteristics(void) u32 reserved4; u32 general_char[510]; u32 chsc_char[508]; - } __attribute__ ((packed)) *scsc_area; + } *scsc_area; spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); @@ -1236,7 +1236,7 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta) unsigned int rsvd3[3]; u64 clock_delta; unsigned int rsvd4[2]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1261,7 +1261,7 @@ int chsc_sstpi(void *page, void *result, size_t size) unsigned int rsvd0[3]; struct chsc_header response; char data[]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1284,7 +1284,7 @@ int chsc_siosl(struct subchannel_id schid) u32 word3; struct chsc_header response; u32 word[11]; - } __attribute__ ((packed)) *siosl_area; + } *siosl_area; unsigned long flags; int ccode; int rc; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 5c9f0dd33f4e..78aba8d94eec 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -15,12 +15,12 @@ #define NR_MEASUREMENT_CHARS 5 struct cmg_chars { u32 values[NR_MEASUREMENT_CHARS]; -} __attribute__ ((packed)); +}; #define NR_MEASUREMENT_ENTRIES 8 struct cmg_entry { u32 values[NR_MEASUREMENT_ENTRIES]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt1 { u8 flags; @@ -38,7 +38,7 @@ struct channel_path_desc_fmt1 { u8 s:1; u8 f:1; u32 zeros[2]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt3 { struct channel_path_desc_fmt1 fmt1_desc; @@ -59,7 +59,7 @@ struct css_chsc_char { u32:7; u32 pnso:1; /* bit 116 */ u32:11; -}__attribute__((packed)); +} __packed; extern struct css_chsc_char css_chsc_characteristics; @@ -82,7 +82,7 @@ struct chsc_ssqd_area { struct chsc_header response; u32:32; struct qdio_ssqd_desc qdio_ssqd; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scssc_area { struct chsc_header request; @@ -102,7 +102,7 @@ struct chsc_scssc_area { u32 reserved[1004]; struct chsc_header response; u32:32; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scpd { struct chsc_header request; @@ -120,7 +120,7 @@ struct chsc_scpd { struct chsc_header response; u32:32; u8 data[0]; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_sda_area { struct chsc_header request; @@ -199,7 +199,7 @@ struct chsc_scm_info { u32 reserved2[10]; u64 restok; struct sale scmal[248]; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token); @@ -243,7 +243,7 @@ struct chsc_pnso_area { struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0]; struct qdio_brinfo_entry_l2 l2[0]; } entries; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_pnso_brinfo(struct subchannel_id schid, struct chsc_pnso_area *brinfo_area, diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 5130d7c67239..de744ca158fd 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -526,76 +526,6 @@ int cio_disable_subchannel(struct subchannel *sch) } EXPORT_SYMBOL_GPL(cio_disable_subchannel); -static int cio_check_devno_blacklisted(struct subchannel *sch) -{ - if (is_blacklisted(sch->schid.ssid, sch->schib.pmcw.dev)) { - /* - * This device must not be known to Linux. So we simply - * say that there is no device and return ENODEV. - */ - CIO_MSG_EVENT(6, "Blacklisted device detected " - "at devno %04X, subchannel set %x\n", - sch->schib.pmcw.dev, sch->schid.ssid); - return -ENODEV; - } - return 0; -} - -/** - * cio_validate_subchannel - basic validation of subchannel - * @sch: subchannel structure to be filled out - * @schid: subchannel id - * - * Find out subchannel type and initialize struct subchannel. - * Return codes: - * 0 on success - * -ENXIO for non-defined subchannels - * -ENODEV for invalid subchannels or blacklisted devices - * -EIO for subchannels in an invalid subchannel set - */ -int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) -{ - char dbf_txt[15]; - int ccode; - int err; - - sprintf(dbf_txt, "valsch%x", schid.sch_no); - CIO_TRACE_EVENT(4, dbf_txt); - - /* - * The first subchannel that is not-operational (ccode==3) - * indicates that there aren't any more devices available. - * If stsch gets an exception, it means the current subchannel set - * is not valid. - */ - ccode = stsch(schid, &sch->schib); - if (ccode) { - err = (ccode == 3) ? -ENXIO : ccode; - goto out; - } - sch->st = sch->schib.pmcw.st; - sch->schid = schid; - - switch (sch->st) { - case SUBCHANNEL_TYPE_IO: - case SUBCHANNEL_TYPE_MSG: - if (!css_sch_is_valid(&sch->schib)) - err = -ENODEV; - else - err = cio_check_devno_blacklisted(sch); - break; - default: - err = 0; - } - if (err) - goto out; - - CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports subchannel type %04X\n", - sch->schid.ssid, sch->schid.sch_no, sch->st); -out: - return err; -} - /* * do_cio_interrupt() handles all normal I/O device IRQ's */ @@ -719,6 +649,7 @@ struct subchannel *cio_probe_console(void) { struct subchannel_id schid; struct subchannel *sch; + struct schib schib; int sch_no, ret; sch_no = cio_get_console_sch_no(); @@ -728,7 +659,11 @@ struct subchannel *cio_probe_console(void) } init_subchannel_id(&schid); schid.sch_no = sch_no; - sch = css_alloc_subchannel(schid); + ret = stsch(schid, &schib); + if (ret) + return ERR_PTR(-ENODEV); + + sch = css_alloc_subchannel(schid, &schib); if (IS_ERR(sch)) return sch; diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 94cd813bdcfe..9811fd8a0c73 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -119,7 +119,6 @@ DECLARE_PER_CPU(struct irb, cio_irb); #define to_subchannel(n) container_of(n, struct subchannel, dev) -extern int cio_validate_subchannel (struct subchannel *, struct subchannel_id); extern int cio_enable_subchannel(struct subchannel *, u32); extern int cio_disable_subchannel (struct subchannel *); extern int cio_cancel (struct subchannel *); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 9263a0fb3858..aea502922646 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -25,6 +25,7 @@ #include "css.h" #include "cio.h" +#include "blacklist.h" #include "cio_debug.h" #include "ioasm.h" #include "chsc.h" @@ -168,18 +169,53 @@ static void css_subchannel_release(struct device *dev) kfree(sch); } -struct subchannel *css_alloc_subchannel(struct subchannel_id schid) +static int css_validate_subchannel(struct subchannel_id schid, + struct schib *schib) +{ + int err; + + switch (schib->pmcw.st) { + case SUBCHANNEL_TYPE_IO: + case SUBCHANNEL_TYPE_MSG: + if (!css_sch_is_valid(schib)) + err = -ENODEV; + else if (is_blacklisted(schid.ssid, schib->pmcw.dev)) { + CIO_MSG_EVENT(6, "Blacklisted device detected " + "at devno %04X, subchannel set %x\n", + schib->pmcw.dev, schid.ssid); + err = -ENODEV; + } else + err = 0; + break; + default: + err = 0; + } + if (err) + goto out; + + CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports subchannel type %04X\n", + schid.ssid, schid.sch_no, schib->pmcw.st); +out: + return err; +} + +struct subchannel *css_alloc_subchannel(struct subchannel_id schid, + struct schib *schib) { struct subchannel *sch; int ret; + ret = css_validate_subchannel(schid, schib); + if (ret < 0) + return ERR_PTR(ret); + sch = kzalloc(sizeof(*sch), GFP_KERNEL | GFP_DMA); if (!sch) return ERR_PTR(-ENOMEM); - ret = cio_validate_subchannel(sch, schid); - if (ret < 0) - goto err; + sch->schid = schid; + sch->schib = *schib; + sch->st = schib->pmcw.st; ret = css_sch_create_locks(sch); if (ret) @@ -244,8 +280,7 @@ static void ssd_register_chpids(struct chsc_ssd_info *ssd) for (i = 0; i < 8; i++) { mask = 0x80 >> i; if (ssd->path_mask & mask) - if (!chp_is_registered(ssd->chpid[i])) - chp_new(ssd->chpid[i]); + chp_new(ssd->chpid[i]); } } @@ -382,12 +417,12 @@ int css_register_subchannel(struct subchannel *sch) return ret; } -static int css_probe_device(struct subchannel_id schid) +static int css_probe_device(struct subchannel_id schid, struct schib *schib) { struct subchannel *sch; int ret; - sch = css_alloc_subchannel(schid); + sch = css_alloc_subchannel(schid, schib); if (IS_ERR(sch)) return PTR_ERR(sch); @@ -436,23 +471,23 @@ EXPORT_SYMBOL_GPL(css_sch_is_valid); static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow) { struct schib schib; + int ccode; if (!slow) { /* Will be done on the slow path. */ return -EAGAIN; } - if (stsch(schid, &schib)) { - /* Subchannel is not provided. */ - return -ENXIO; - } - if (!css_sch_is_valid(&schib)) { - /* Unusable - ignore. */ - return 0; - } - CIO_MSG_EVENT(4, "event: sch 0.%x.%04x, new\n", schid.ssid, - schid.sch_no); + /* + * The first subchannel that is not-operational (ccode==3) + * indicates that there aren't any more devices available. + * If stsch gets an exception, it means the current subchannel set + * is not valid. + */ + ccode = stsch(schid, &schib); + if (ccode) + return (ccode == 3) ? -ENXIO : ccode; - return css_probe_device(schid); + return css_probe_device(schid, &schib); } static int css_evaluate_known_subchannel(struct subchannel *sch, int slow) @@ -1081,6 +1116,11 @@ static int __init channel_subsystem_init(void) if (ret) goto out_wq; + /* Register subchannels which are already in use. */ + cio_register_early_subchannels(); + /* Start initial subchannel evaluation. */ + css_schedule_eval_all(); + return ret; out_wq: destroy_workqueue(cio_work_q); @@ -1120,10 +1160,6 @@ int css_complete_work(void) */ static int __init channel_subsystem_init_sync(void) { - /* Register subchannels which are already in use. */ - cio_register_early_subchannels(); - /* Start initial subchannel evaluation. */ - css_schedule_eval_all(); css_complete_work(); return 0; } diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 30357cbf350a..8d832900a63d 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -103,7 +103,8 @@ extern void css_driver_unregister(struct css_driver *); extern void css_sch_device_unregister(struct subchannel *); extern int css_register_subchannel(struct subchannel *); -extern struct subchannel *css_alloc_subchannel(struct subchannel_id); +extern struct subchannel *css_alloc_subchannel(struct subchannel_id, + struct schib *schib); extern struct subchannel *get_subchannel_by_schid(struct subchannel_id); extern int css_init_done; extern int max_ssid; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index f4ca72dd862f..9c7d9da42ba0 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -631,21 +631,20 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, unsigned long phys_aob = 0; if (!q->use_cq) - goto out; + return 0; if (!q->aobs[bufnr]) { struct qaob *aob = qdio_allocate_aob(); q->aobs[bufnr] = aob; } if (q->aobs[bufnr]) { - q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE; q->sbal_state[bufnr].aob = q->aobs[bufnr]; q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user; phys_aob = virt_to_phys(q->aobs[bufnr]); WARN_ON_ONCE(phys_aob & 0xFF); } -out: + q->sbal_state[bufnr].flags = 0; return phys_aob; } diff --git a/drivers/s390/cio/trace.h b/drivers/s390/cio/trace.h index 1f8d1c1e566d..0ebb29b6fd6d 100644 --- a/drivers/s390/cio/trace.h +++ b/drivers/s390/cio/trace.h @@ -30,6 +30,17 @@ DECLARE_EVENT_CLASS(s390_class_schib, __field(u16, schno) __field(u16, devno) __field_struct(struct schib, schib) + __field(u8, pmcw_ena) + __field(u8, pmcw_st) + __field(u8, pmcw_dnv) + __field(u16, pmcw_dev) + __field(u8, pmcw_lpm) + __field(u8, pmcw_pnom) + __field(u8, pmcw_lpum) + __field(u8, pmcw_pim) + __field(u8, pmcw_pam) + __field(u8, pmcw_pom) + __field(u64, pmcw_chpid) __field(int, cc) ), TP_fast_assign( @@ -38,18 +49,29 @@ DECLARE_EVENT_CLASS(s390_class_schib, __entry->schno = schid.sch_no; __entry->devno = schib->pmcw.dev; __entry->schib = *schib; + __entry->pmcw_ena = schib->pmcw.ena; + __entry->pmcw_st = schib->pmcw.ena; + __entry->pmcw_dnv = schib->pmcw.dnv; + __entry->pmcw_dev = schib->pmcw.dev; + __entry->pmcw_lpm = schib->pmcw.lpm; + __entry->pmcw_pnom = schib->pmcw.pnom; + __entry->pmcw_lpum = schib->pmcw.lpum; + __entry->pmcw_pim = schib->pmcw.pim; + __entry->pmcw_pam = schib->pmcw.pam; + __entry->pmcw_pom = schib->pmcw.pom; + memcpy(&__entry->pmcw_chpid, &schib->pmcw.chpid, 8); __entry->cc = cc; ), TP_printk("schid=%x.%x.%04x cc=%d ena=%d st=%d dnv=%d dev=%04x " "lpm=0x%02x pnom=0x%02x lpum=0x%02x pim=0x%02x pam=0x%02x " "pom=0x%02x chpids=%016llx", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - __entry->schib.pmcw.ena, __entry->schib.pmcw.st, - __entry->schib.pmcw.dnv, __entry->schib.pmcw.dev, - __entry->schib.pmcw.lpm, __entry->schib.pmcw.pnom, - __entry->schib.pmcw.lpum, __entry->schib.pmcw.pim, - __entry->schib.pmcw.pam, __entry->schib.pmcw.pom, - *((u64 *) __entry->schib.pmcw.chpid) + __entry->pmcw_ena, __entry->pmcw_st, + __entry->pmcw_dnv, __entry->pmcw_dev, + __entry->pmcw_lpm, __entry->pmcw_pnom, + __entry->pmcw_lpum, __entry->pmcw_pim, + __entry->pmcw_pam, __entry->pmcw_pom, + __entry->pmcw_chpid ) ); @@ -89,6 +111,13 @@ TRACE_EVENT(s390_cio_tsch, __field(u8, ssid) __field(u16, schno) __field_struct(struct irb, irb) + __field(u8, scsw_dcc) + __field(u8, scsw_pno) + __field(u8, scsw_fctl) + __field(u8, scsw_actl) + __field(u8, scsw_stctl) + __field(u8, scsw_dstat) + __field(u8, scsw_cstat) __field(int, cc) ), TP_fast_assign( @@ -96,15 +125,22 @@ TRACE_EVENT(s390_cio_tsch, __entry->ssid = schid.ssid; __entry->schno = schid.sch_no; __entry->irb = *irb; + __entry->scsw_dcc = scsw_cc(&irb->scsw); + __entry->scsw_pno = scsw_pno(&irb->scsw); + __entry->scsw_fctl = scsw_fctl(&irb->scsw); + __entry->scsw_actl = scsw_actl(&irb->scsw); + __entry->scsw_stctl = scsw_stctl(&irb->scsw); + __entry->scsw_dstat = scsw_dstat(&irb->scsw); + __entry->scsw_cstat = scsw_cstat(&irb->scsw); __entry->cc = cc; ), TP_printk("schid=%x.%x.%04x cc=%d dcc=%d pno=%d fctl=0x%x actl=0x%x " "stctl=0x%x dstat=0x%x cstat=0x%x", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - scsw_cc(&__entry->irb.scsw), scsw_pno(&__entry->irb.scsw), - scsw_fctl(&__entry->irb.scsw), scsw_actl(&__entry->irb.scsw), - scsw_stctl(&__entry->irb.scsw), - scsw_dstat(&__entry->irb.scsw), scsw_cstat(&__entry->irb.scsw) + __entry->scsw_dcc, __entry->scsw_pno, + __entry->scsw_fctl, __entry->scsw_actl, + __entry->scsw_stctl, + __entry->scsw_dstat, __entry->scsw_cstat ) ); @@ -122,6 +158,9 @@ TRACE_EVENT(s390_cio_tpi, __field(u8, cssid) __field(u8, ssid) __field(u16, schno) + __field(u8, adapter_IO) + __field(u8, isc) + __field(u8, type) ), TP_fast_assign( __entry->cc = cc; @@ -136,11 +175,14 @@ TRACE_EVENT(s390_cio_tpi, __entry->cssid = __entry->tpi_info.schid.cssid; __entry->ssid = __entry->tpi_info.schid.ssid; __entry->schno = __entry->tpi_info.schid.sch_no; + __entry->adapter_IO = __entry->tpi_info.adapter_IO; + __entry->isc = __entry->tpi_info.isc; + __entry->type = __entry->tpi_info.type; ), TP_printk("schid=%x.%x.%04x cc=%d a=%d isc=%d type=%d", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - __entry->tpi_info.adapter_IO, __entry->tpi_info.isc, - __entry->tpi_info.type + __entry->adapter_IO, __entry->isc, + __entry->type ) ); @@ -299,16 +341,20 @@ TRACE_EVENT(s390_cio_interrupt, __field(u8, cssid) __field(u8, ssid) __field(u16, schno) + __field(u8, isc) + __field(u8, type) ), TP_fast_assign( __entry->tpi_info = *tpi_info; - __entry->cssid = __entry->tpi_info.schid.cssid; - __entry->ssid = __entry->tpi_info.schid.ssid; - __entry->schno = __entry->tpi_info.schid.sch_no; + __entry->cssid = tpi_info->schid.cssid; + __entry->ssid = tpi_info->schid.ssid; + __entry->schno = tpi_info->schid.sch_no; + __entry->isc = tpi_info->isc; + __entry->type = tpi_info->type; ), TP_printk("schid=%x.%x.%04x isc=%d type=%d", __entry->cssid, __entry->ssid, __entry->schno, - __entry->tpi_info.isc, __entry->tpi_info.type + __entry->isc, __entry->type ) ); @@ -321,11 +367,13 @@ TRACE_EVENT(s390_cio_adapter_int, TP_ARGS(tpi_info), TP_STRUCT__entry( __field_struct(struct tpi_info, tpi_info) + __field(u8, isc) ), TP_fast_assign( __entry->tpi_info = *tpi_info; + __entry->isc = tpi_info->isc; ), - TP_printk("isc=%d", __entry->tpi_info.isc) + TP_printk("isc=%d", __entry->isc) ); /** @@ -339,16 +387,30 @@ TRACE_EVENT(s390_cio_stcrw, TP_STRUCT__entry( __field_struct(struct crw, crw) __field(int, cc) + __field(u8, slct) + __field(u8, oflw) + __field(u8, chn) + __field(u8, rsc) + __field(u8, anc) + __field(u8, erc) + __field(u16, rsid) ), TP_fast_assign( __entry->crw = *crw; __entry->cc = cc; + __entry->slct = crw->slct; + __entry->oflw = crw->oflw; + __entry->chn = crw->chn; + __entry->rsc = crw->rsc; + __entry->anc = crw->anc; + __entry->erc = crw->erc; + __entry->rsid = crw->rsid; ), TP_printk("cc=%d slct=%d oflw=%d chn=%d rsc=%d anc=%d erc=0x%x " "rsid=0x%x", - __entry->cc, __entry->crw.slct, __entry->crw.oflw, - __entry->crw.chn, __entry->crw.rsc, __entry->crw.anc, - __entry->crw.erc, __entry->crw.rsid + __entry->cc, __entry->slct, __entry->oflw, + __entry->chn, __entry->rsc, __entry->anc, + __entry->erc, __entry->rsid ) ); diff --git a/drivers/s390/crypto/ap_asm.h b/drivers/s390/crypto/ap_asm.h deleted file mode 100644 index 16b59ce5e01d..000000000000 --- a/drivers/s390/crypto/ap_asm.h +++ /dev/null @@ -1,236 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - * - * Adjunct processor bus inline assemblies. - */ - -#ifndef _AP_ASM_H_ -#define _AP_ASM_H_ - -#include <asm/isc.h> - -/** - * ap_intructions_available() - Test if AP instructions are available. - * - * Returns 0 if the AP instructions are installed. - */ -static inline int ap_instructions_available(void) -{ - register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); - register unsigned long reg1 asm ("1") = -ENODEV; - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile( - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - "0: la %1,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc"); - return reg1; -} - -/** - * ap_tapq(): Test adjunct processor queue. - * @qid: The AP queue number - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) -{ - register unsigned long reg0 asm ("0") = qid; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ - : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); - if (info) - *info = reg2; - return reg1; -} - -/** - * ap_pqap_rapq(): Reset adjunct processor queue. - * @qid: The AP queue number - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_rapq(ap_qid_t qid) -{ - register unsigned long reg0 asm ("0") = qid | 0x01000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile( - ".long 0xb2af0000" /* PQAP(RAPQ) */ - : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); - return reg1; -} - -/** - * ap_aqic(): Control interruption for a specific AP. - * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - */ -static inline struct ap_queue_status ap_aqic(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; - register struct ap_queue_status reg1_out asm ("1"); - register void *reg2 asm ("2") = ind; - - asm volatile( - ".long 0xb2af0000" /* PQAP(AQIC) */ - : "+d" (reg0), "+d" (reg1_in), "=d" (reg1_out), "+d" (reg2) - : - : "cc"); - return reg1_out; -} - -/** - * ap_qci(): Get AP configuration data - * - * Returns 0 on success, or -EOPNOTSUPP. - */ -static inline int ap_qci(void *config) -{ - register unsigned long reg0 asm ("0") = 0x04000000UL; - register unsigned long reg1 asm ("1") = -EINVAL; - register void *reg2 asm ("2") = (void *) config; - - asm volatile( - ".long 0xb2af0000\n" /* PQAP(QCI) */ - "0: la %1,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg0), "+d" (reg1), "+d" (reg2) - : - : "cc", "memory"); - - return reg1; -} - -/* - * union ap_qact_ap_info - used together with the - * ap_aqic() function to provide a convenient way - * to handle the ap info needed by the qact function. - */ -union ap_qact_ap_info { - unsigned long val; - struct { - unsigned int : 3; - unsigned int mode : 3; - unsigned int : 26; - unsigned int cat : 8; - unsigned int : 8; - unsigned char ver[2]; - }; -}; - -/** - * ap_qact(): Query AP combatibility type. - * @qid: The AP queue number - * @apinfo: On input the info about the AP queue. On output the - * alternate AP queue info provided by the qact function - * in GR2 is stored in. - * - * Returns AP queue status. Check response_code field for failures. - */ -static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, - union ap_qact_ap_info *apinfo) -{ - register unsigned long reg0 asm ("0") = qid | (5UL << 24) - | ((ifbit & 0x01) << 22); - register unsigned long reg1_in asm ("1") = apinfo->val; - register struct ap_queue_status reg1_out asm ("1"); - register unsigned long reg2 asm ("2") = 0; - - asm volatile( - ".long 0xb2af0000" /* PQAP(QACT) */ - : "+d" (reg0), "+d" (reg1_in), "=d" (reg1_out), "+d" (reg2) - : : "cc"); - apinfo->val = reg2; - return reg1_out; -} - -/** - * ap_nqap(): Send message to adjunct processor queue. - * @qid: The AP queue number - * @psmid: The program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on NQAP can't happen because the L bit is 1. - * Condition code 2 on NQAP also means the send is incomplete, - * because a segment boundary was reached. The NQAP is repeated. - */ -static inline struct ap_queue_status ap_nqap(ap_qid_t qid, - unsigned long long psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = (unsigned long) msg; - register unsigned long reg3 asm ("3") = (unsigned long) length; - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; - - asm volatile ( - "0: .long 0xb2ad0042\n" /* NQAP */ - " brc 2,0b" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) - : "d" (reg4), "d" (reg5) - : "cc", "memory"); - return reg1; -} - -/** - * ap_dqap(): Receive message from adjunct processor queue. - * @qid: The AP queue number - * @psmid: Pointer to program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on DQAP means the receive has taken place - * but only partially. The response is incomplete, hence the - * DQAP is repeated. - * Condition code 2 on DQAP also means the receive is incomplete, - * this time because a segment boundary was reached. Again, the - * DQAP is repeated. - * Note that gpr2 is used by the DQAP instruction to keep track of - * any 'residual' length, in case the instruction gets interrupted. - * Hence it gets zeroed before the instruction. - */ -static inline struct ap_queue_status ap_dqap(ap_qid_t qid, - unsigned long long *psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm("0") = qid | 0x80000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm("2") = 0UL; - register unsigned long reg4 asm("4") = (unsigned long) msg; - register unsigned long reg5 asm("5") = (unsigned long) length; - register unsigned long reg6 asm("6") = 0UL; - register unsigned long reg7 asm("7") = 0UL; - - - asm volatile( - "0: .long 0xb2ae0064\n" /* DQAP */ - " brc 6,0b\n" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), - "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) - : : "cc", "memory"); - *psmid = (((unsigned long long) reg6) << 32) + reg7; - return reg1; -} - -#endif /* _AP_ASM_H_ */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 35a0c2b52f82..bf27fc4d1335 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,7 +36,6 @@ #include <linux/debugfs.h> #include "ap_bus.h" -#include "ap_asm.h" #include "ap_debug.h" /* @@ -174,24 +173,6 @@ static inline int ap_qact_available(void) return 0; } -/** - * ap_test_queue(): Test adjunct processor queue. - * @qid: The AP queue number - * @tbit: Test facilities bit - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info) -{ - if (tbit) - qid |= 1UL << 23; /* set T bit*/ - return ap_tapq(qid, info); -} -EXPORT_SYMBOL(ap_test_queue); - /* * ap_query_configuration(): Fetch cryptographic config info * @@ -200,7 +181,7 @@ EXPORT_SYMBOL(ap_test_queue); * is returned, e.g. if the PQAP(QCI) instruction is not * available, the return value will be -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info) +static inline int ap_query_configuration(struct ap_config_info *info) { if (!ap_configuration_available()) return -EOPNOTSUPP; @@ -493,7 +474,7 @@ static int ap_poll_thread_start(void) return 0; mutex_lock(&ap_poll_thread_mutex); ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll"); - rc = PTR_RET(ap_poll_kthread); + rc = PTR_ERR_OR_ZERO(ap_poll_kthread); if (rc) ap_poll_kthread = NULL; mutex_unlock(&ap_poll_thread_mutex); @@ -1261,7 +1242,7 @@ static int __init ap_module_init(void) /* Create /sys/devices/ap. */ ap_root_device = root_device_register("ap"); - rc = PTR_RET(ap_root_device); + rc = PTR_ERR_OR_ZERO(ap_root_device); if (rc) goto out_bus; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6a273c5ebca5..936541937e15 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -15,6 +15,7 @@ #include <linux/device.h> #include <linux/types.h> +#include <asm/isc.h> #include <asm/ap.h> #define AP_DEVICES 256 /* Number of AP devices. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 2c726df210f6..c13e43292cb7 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -14,7 +14,6 @@ #include <asm/facility.h> #include "ap_bus.h" -#include "ap_asm.h" /* * AP card related attributes. diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ba3a2e13b0eb..e365171fe28f 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -14,26 +14,6 @@ #include <asm/facility.h> #include "ap_bus.h" -#include "ap_asm.h" - -/** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. - */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - return ap_aqic(qid, qirqctrl, ind); -} -EXPORT_SYMBOL(ap_queue_irq_ctrl); /** * ap_queue_enable_interruption(): Enable interruption on an AP queue. diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 3929c8be8098..e663432395c1 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -699,7 +699,7 @@ static int query_crypto_facility(u16 cardnr, u16 domain, /* fill request cprb param block with FQ request */ preqparm = (struct fqreqparm *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "FQ", 2); - strncpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); + memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); preqparm->rule_array_len = sizeof(preqparm->rule_array_len) + sizeof(preqparm->rule_array); preqparm->lv1.len = sizeof(preqparm->lv1); diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index 233e1e695208..da2c8dfd4d74 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -83,9 +83,21 @@ static ssize_t zcrypt_card_online_store(struct device *dev, static DEVICE_ATTR(online, 0644, zcrypt_card_online_show, zcrypt_card_online_store); +static ssize_t zcrypt_card_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zcrypt_card *zc = to_ap_card(dev)->private; + + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zc->load)); +} + +static DEVICE_ATTR(load, 0444, zcrypt_card_load_show, NULL); + static struct attribute *zcrypt_card_attrs[] = { &dev_attr_type.attr, &dev_attr_online.attr, + &dev_attr_load.attr, NULL, }; diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index 011d61d8a4ae..1752622b95f7 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -99,7 +99,7 @@ struct cca_pvt_ext_CRT_sec { * @mex: pointer to user input data * @p: pointer to memory area for the key * - * Returns the size of the key area or -EFAULT + * Returns the size of the key area or negative errno value. */ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p) { @@ -118,6 +118,15 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p) unsigned char *temp; int i; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_modexpo(). However, do a plausibility check + * here to make sure the following copy_from_user() can't be utilized + * to compromise the system. + */ + if (WARN_ON_ONCE(mex->inputdatalength > 512)) + return -EINVAL; + memset(key, 0, sizeof(*key)); key->pubHdr = static_pub_hdr; @@ -178,6 +187,15 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p) struct cca_public_sec *pub; int short_len, long_len, pad_len, key_len, size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_crt(). However, do a plausibility check + * here to make sure the following copy_from_user() can't be utilized + * to compromise the system. + */ + if (WARN_ON_ONCE(crt->inputdatalength > 512)) + return -EINVAL; + memset(key, 0, sizeof(*key)); short_len = (crt->inputdatalength + 1) / 2; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 97d4bacbc442..e70ae078c86b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -246,7 +246,7 @@ int speed_idx_ep11(int req_type) * @ap_msg: pointer to AP message * @mex: pointer to user input data * - * Returns 0 on success or -EFAULT. + * Returns 0 on success or negative errno value. */ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, struct ap_message *ap_msg, @@ -272,6 +272,14 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, } __packed * msg = ap_msg->message; int size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_modexpo(). However, make sure the following + * copy_from_user() never exceeds the allocated buffer space. + */ + if (WARN_ON_ONCE(mex->inputdatalength > PAGE_SIZE)) + return -EINVAL; + /* VUD.ciphertext */ msg->length = mex->inputdatalength + 2; if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength)) @@ -307,7 +315,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, * @ap_msg: pointer to AP message * @crt: pointer to user input data * - * Returns 0 on success or -EFAULT. + * Returns 0 on success or negative errno value. */ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, struct ap_message *ap_msg, @@ -334,6 +342,14 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, } __packed * msg = ap_msg->message; int size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_crt(). However, make sure the following + * copy_from_user() never exceeds the allocated buffer space. + */ + if (WARN_ON_ONCE(crt->inputdatalength > PAGE_SIZE)) + return -EINVAL; + /* VUD.ciphertext */ msg->length = crt->inputdatalength + 2; if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength)) diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 720434e18007..91a52f268353 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -75,8 +75,20 @@ static ssize_t zcrypt_queue_online_store(struct device *dev, static DEVICE_ATTR(online, 0644, zcrypt_queue_online_show, zcrypt_queue_online_store); +static ssize_t zcrypt_queue_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zcrypt_queue *zq = to_ap_queue(dev)->private; + + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zq->load)); +} + +static DEVICE_ATTR(load, 0444, zcrypt_queue_load_show, NULL); + static struct attribute *zcrypt_queue_attrs[] = { &dev_attr_online.attr, + &dev_attr_load.attr, NULL, }; diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index a3a8c8d9d717..94f4d8fe85e0 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -101,7 +101,7 @@ static void __init zfcp_init_device_setup(char *devstr) token = strsep(&str, ","); if (!token || strlen(token) >= ZFCP_BUS_ID_SIZE) goto err_out; - strncpy(busid, token, ZFCP_BUS_ID_SIZE); + strlcpy(busid, token, ZFCP_BUS_ID_SIZE); token = strsep(&str, ","); if (!token || kstrtoull(token, 0, (unsigned long long *) &wwpn)) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b6270a3b38e9..b955b986b341 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/init/Kconfig b/init/Kconfig index 041f3a022122..8b1ab81ecda8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -125,10 +125,13 @@ config HAVE_KERNEL_LZO config HAVE_KERNEL_LZ4 bool +config HAVE_KERNEL_UNCOMPRESSED + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -207,6 +210,16 @@ config KERNEL_LZ4 is about 8% bigger than LZO. But the decompression speed is faster than LZO. +config KERNEL_UNCOMPRESSED + bool "None" + depends on HAVE_KERNEL_UNCOMPRESSED + help + Produce uncompressed kernel image. This option is usually not what + you want. It is useful for debugging the kernel in slow simulation + environments, where decompressing and moving the kernel is awfully + slow. This option allows early boot code to skip the decompressor + and jump right at uncompressed kernel image. + endchoice config DEFAULT_HOSTNAME diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 140fa8bb5c23..914ebe98fc21 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z) asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } -static inline void LOAD_DATA(int x, int n, u8 *ptr) +static inline void LOAD_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VLM %2,%3,0,%r1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); + : : "m" (*__ptr), "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } -static inline void STORE_DATA(int x, int n, u8 *ptr) +static inline void STORE_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); + : "=m" (*__ptr) : "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } static inline void COPY_VEC(int x, int y) @@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) q = dptr[z0 + 2]; /* RS syndrome */ for (d = 0; d < bytes; d += $#*NSIZE) { - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= 0; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } - STORE_DATA(0,$#,&p[d]); - STORE_DATA(8,$#,&q[d]); + STORE_DATA(0,&p[d]); + STORE_DATA(8,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } @@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, for (d = 0; d < bytes; d += $#*NSIZE) { /* P/Q data pages */ - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= start; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } @@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); } - LOAD_DATA(16,$#,&p[d]); + LOAD_DATA(16,&p[d]); XOR(16+$$,16+$$,0+$$); - STORE_DATA(16,$#,&p[d]); - LOAD_DATA(16,$#,&q[d]); + STORE_DATA(16,&p[d]); + LOAD_DATA(16,&q[d]); XOR(16+$$,16+$$,8+$$); - STORE_DATA(16,$#,&q[d]); + STORE_DATA(16,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 615252331813..91cf50b2b0e5 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -90,6 +90,30 @@ unsigned int yield_mod_cnt, nr_abort; #error "Unsupported architecture" #endif +#elif defined(__s390__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "r12" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ + "je 333f\n\t" \ + "222:\n\t" \ + "ahi %%" INJECT_ASM_REG ", -1\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" + #elif defined(__ARMEL__) #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h new file mode 100644 index 000000000000..1069e85258ce --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -0,0 +1,513 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("bcr 15,0" ::: "memory") +#define rseq_smp_rmb() rseq_smp_mb() +#define rseq_smp_wmb() rseq_smp_mb() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_barrier(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_barrier(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#ifdef __s390x__ + +#define LONG_L "lg" +#define LONG_S "stg" +#define LONG_LT_R "ltgr" +#define LONG_CMP "cg" +#define LONG_CMP_R "cgr" +#define LONG_ADDI "aghi" +#define LONG_ADD_R "agr" + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#elif __s390__ + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#define LONG_L "l" +#define LONG_S "st" +#define LONG_LT_R "ltr" +#define LONG_CMP "c" +#define LONG_CMP_R "cr" +#define LONG_ADDI "ahi" +#define LONG_ADD_R "ar" + +#endif + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "larl %%r0, " __rseq_str(cs_label) "\n\t" \ + LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "j %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "j %l[" __rseq_str(cmpfail_label) "]\n\t" \ + ".popsection\n\t" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + LONG_S " %%r1, %[load]\n\t" + LONG_ADD_R " %%r1, %[voffp]\n\t" + LONG_L " %%r1, 0(%%r1)\n\t" + /* final store */ + LONG_S " %%r1, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "r" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0", "r1" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " %%r0, %[v]\n\t" + LONG_ADD_R " %%r0, %[count]\n\t" + /* final store */ + LONG_S " %%r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu); +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + LONG_LT_R " %[len], %[len]\n\t" + "jz 333f\n\t" + "222:\n\t" + "ic %%r0,0(%[src])\n\t" + "stc %%r0,0(%[dst])\n\t" + LONG_ADDI " %[src], 1\n\t" + LONG_ADDI " %[dst], 1\n\t" + LONG_ADDI " %[len], -1\n\t" + "jnz 222b\n\t" + "333:\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len, + newv, cpu); +} +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 86ce22417e0d..ca332efe9713 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -75,6 +75,8 @@ extern __thread volatile struct rseq __rseq_abi; #include <rseq-ppc.h> #elif defined(__mips__) #include <rseq-mips.h> +#elif defined(__s390__) +#include <rseq-s390.h> #else #error unsupported target #endif |