summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig34
-rw-r--r--arch/s390/Kconfig.debug12
-rw-r--r--arch/s390/Makefile9
-rw-r--r--arch/s390/configs/default_defconfig27
-rw-r--r--arch/s390/configs/gcov_defconfig24
-rw-r--r--arch/s390/configs/performance_defconfig24
-rw-r--r--arch/s390/configs/zfcpdump_defconfig10
-rw-r--r--arch/s390/crypto/aes_s390.c6
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/defconfig30
-rw-r--r--arch/s390/hypfs/inode.c12
-rw-r--r--arch/s390/include/asm/barrier.h23
-rw-r--r--arch/s390/include/asm/cache.h3
-rw-r--r--arch/s390/include/asm/checksum.h6
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/clp.h27
-rw-r--r--arch/s390/include/asm/compat.h2
-rw-r--r--arch/s390/include/asm/crw.h14
-rw-r--r--arch/s390/include/asm/device.h6
-rw-r--r--arch/s390/include/asm/dma-mapping.h8
-rw-r--r--arch/s390/include/asm/elf.h25
-rw-r--r--arch/s390/include/asm/facilities_src.h58
-rw-r--r--arch/s390/include/asm/facility.h17
-rw-r--r--arch/s390/include/asm/fpu/internal.h12
-rw-r--r--arch/s390/include/asm/gmap.h64
-rw-r--r--arch/s390/include/asm/ipl.h17
-rw-r--r--arch/s390/include/asm/irqflags.h9
-rw-r--r--arch/s390/include/asm/kvm_host.h101
-rw-r--r--arch/s390/include/asm/livepatch.h4
-rw-r--r--arch/s390/include/asm/lowcore.h27
-rw-r--r--arch/s390/include/asm/mmu_context.h28
-rw-r--r--arch/s390/include/asm/os_info.h2
-rw-r--r--arch/s390/include/asm/pci.h6
-rw-r--r--arch/s390/include/asm/pci_clp.h30
-rw-r--r--arch/s390/include/asm/pci_dma.h6
-rw-r--r--arch/s390/include/asm/pci_io.h14
-rw-r--r--arch/s390/include/asm/percpu.h1
-rw-r--r--arch/s390/include/asm/perf_event.h2
-rw-r--r--arch/s390/include/asm/pgalloc.h28
-rw-r--r--arch/s390/include/asm/pgtable.h648
-rw-r--r--arch/s390/include/asm/processor.h28
-rw-r--r--arch/s390/include/asm/ptrace.h44
-rw-r--r--arch/s390/include/asm/reset.h3
-rw-r--r--arch/s390/include/asm/rwsem.h2
-rw-r--r--arch/s390/include/asm/sclp.h21
-rw-r--r--arch/s390/include/asm/setup.h33
-rw-r--r--arch/s390/include/asm/smp.h2
-rw-r--r--arch/s390/include/asm/sysinfo.h17
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/trace/diag.h6
-rw-r--r--arch/s390/include/asm/uaccess.h8
-rw-r--r--arch/s390/include/asm/vdso.h6
-rw-r--r--arch/s390/include/asm/xor.h21
-rw-r--r--arch/s390/include/uapi/asm/clp.h28
-rw-r--r--arch/s390/include/uapi/asm/kvm.h13
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/include/uapi/asm/socket.h5
-rw-r--r--arch/s390/include/uapi/asm/unistd.h22
-rw-r--r--arch/s390/kernel/Makefile8
-rw-r--r--arch/s390/kernel/asm-offsets.c176
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c2
-rw-r--r--arch/s390/kernel/cpcmd.c3
-rw-r--r--arch/s390/kernel/crash_dump.c452
-rw-r--r--arch/s390/kernel/debug.c8
-rw-r--r--arch/s390/kernel/diag.c4
-rw-r--r--arch/s390/kernel/dis.c4
-rw-r--r--arch/s390/kernel/dumpstack.c102
-rw-r--r--arch/s390/kernel/early.c18
-rw-r--r--arch/s390/kernel/entry.S109
-rw-r--r--arch/s390/kernel/ftrace.c2
-rw-r--r--arch/s390/kernel/head.S142
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/ipl.c90
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/kprobes.c16
-rw-r--r--arch/s390/kernel/machine_kexec.c110
-rw-r--r--arch/s390/kernel/module.c22
-rw-r--r--arch/s390/kernel/os_info.c7
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c3
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c11
-rw-r--r--arch/s390/kernel/perf_event.c60
-rw-r--r--arch/s390/kernel/process.c18
-rw-r--r--arch/s390/kernel/processor.c6
-rw-r--r--arch/s390/kernel/ptrace.c6
-rw-r--r--arch/s390/kernel/reipl.S94
-rw-r--r--arch/s390/kernel/sclp.c67
-rw-r--r--arch/s390/kernel/setup.c61
-rw-r--r--arch/s390/kernel/signal.c13
-rw-r--r--arch/s390/kernel/smp.c161
-rw-r--r--arch/s390/kernel/stacktrace.c109
-rw-r--r--arch/s390/kernel/syscalls.S4
-rw-r--r--arch/s390/kernel/sysinfo.c20
-rw-r--r--arch/s390/kernel/time.c14
-rw-r--r--arch/s390/kernel/topology.c7
-rw-r--r--arch/s390/kernel/trace.c9
-rw-r--r--arch/s390/kernel/traps.c33
-rw-r--r--arch/s390/kernel/vdso.c17
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso32/getcpu.S43
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S1
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/getcpu.S42
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S1
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c12
-rw-r--r--arch/s390/kvm/gaccess.c95
-rw-r--r--arch/s390/kvm/gaccess.h38
-rw-r--r--arch/s390/kvm/guestdbg.c8
-rw-r--r--arch/s390/kvm/intercept.c85
-rw-r--r--arch/s390/kvm/interrupt.c242
-rw-r--r--arch/s390/kvm/kvm-s390.c661
-rw-r--r--arch/s390/kvm/kvm-s390.h35
-rw-r--r--arch/s390/kvm/priv.c20
-rw-r--r--arch/s390/kvm/sigp.c8
-rw-r--r--arch/s390/kvm/trace-s390.h6
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/spinlock.c45
-rw-r--r--arch/s390/lib/xor.c134
-rw-r--r--arch/s390/mm/Makefile6
-rw-r--r--arch/s390/mm/extable.c81
-rw-r--r--arch/s390/mm/extmem.c20
-rw-r--r--arch/s390/mm/fault.c24
-rw-r--r--arch/s390/mm/gmap.c774
-rw-r--r--arch/s390/mm/gup.c37
-rw-r--r--arch/s390/mm/hugetlbpage.c7
-rw-r--r--arch/s390/mm/init.c42
-rw-r--r--arch/s390/mm/maccess.c16
-rw-r--r--arch/s390/mm/mem_detect.c7
-rw-r--r--arch/s390/mm/mmap.c72
-rw-r--r--arch/s390/mm/pageattr.c8
-rw-r--r--arch/s390/mm/pgalloc.c360
-rw-r--r--arch/s390/mm/pgtable.c1547
-rw-r--r--arch/s390/mm/vmem.c10
-rw-r--r--arch/s390/net/bpf_jit_comp.c13
-rw-r--r--arch/s390/numa/numa.c6
-rw-r--r--arch/s390/oprofile/Makefile2
-rw-r--r--arch/s390/oprofile/backtrace.c77
-rw-r--r--arch/s390/oprofile/init.c21
-rw-r--r--arch/s390/pci/pci.c76
-rw-r--r--arch/s390/pci/pci_clp.c250
-rw-r--r--arch/s390/pci/pci_debug.c5
-rw-r--r--arch/s390/pci/pci_dma.c131
-rw-r--r--arch/s390/pci/pci_event.c18
-rw-r--r--arch/s390/tools/.gitignore1
-rw-r--r--arch/s390/tools/Makefile15
-rw-r--r--arch/s390/tools/gen_facilities.c67
150 files changed, 4788 insertions, 4033 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3a55f493c7da..aad23e3dff2c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -10,9 +10,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
-config HAVE_LATENCYTOP_SUPPORT
- def_bool y
-
config RWSEM_GENERIC_SPINLOCK
bool
@@ -62,10 +59,14 @@ config PCI_QUIRKS
config ARCH_SUPPORTS_UPROBES
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config S390
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SG_CHAIN
@@ -126,6 +127,7 @@ config S390
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DMA_API_DEBUG
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FTRACE_MCOUNT_RECORD
@@ -166,8 +168,7 @@ config SCHED_OMIT_FRAME_POINTER
config PGTABLE_LEVELS
int
- default 4 if 64BIT
- default 2
+ default 4
source "init/Kconfig"
@@ -257,12 +258,12 @@ config MARCH_ZEC12
older machines.
config MARCH_Z13
- bool "IBM z13"
+ bool "IBM z13s and z13"
select HAVE_MARCH_Z13_FEATURES
help
- Select this to enable optimizations for IBM z13 (2964 series).
- The kernel will be slightly faster but will not work on older
- machines.
+ Select this to enable optimizations for IBM z13s and z13 (2965 and
+ 2964 series). The kernel will be slightly faster but will not work on
+ older machines.
endchoice
@@ -390,9 +391,6 @@ config HOTPLUG_CPU
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
-config SCHED_SMT
- def_bool n
-
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
# between a node's start and end pfns, it may not
@@ -403,7 +401,7 @@ config NODES_SPAN_OTHER_NODES
config NUMA
bool "NUMA support"
- depends on SMP && 64BIT && SCHED_TOPOLOGY
+ depends on SMP && SCHED_TOPOLOGY
default n
help
Enable NUMA support
@@ -463,6 +461,9 @@ config EMU_SIZE
endmenu
+config SCHED_SMT
+ def_bool n
+
config SCHED_MC
def_bool n
@@ -582,7 +583,6 @@ config QDIO
menuconfig PCI
bool "PCI support"
- select HAVE_DMA_ATTRS
select PCI_MSI
select IOMMU_SUPPORT
help
@@ -609,8 +609,6 @@ config PCI_NR_MSI
PCI devices.
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
endif # PCI
@@ -623,10 +621,6 @@ config HAS_IOMEM
config IOMMU_HELPER
def_bool PCI
-config HAS_DMA
- def_bool PCI
- select HAVE_DMA_API_DEBUG
-
config NEED_SG_DMA_LENGTH
def_bool PCI
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c56878e1245f..26c5d5beb4be 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
-config STRICT_DEVMEM
- def_bool y
- prompt "Filter access to /dev/mem"
- ---help---
- This option restricts access to /dev/mem. If this option is
- disabled, you allow userspace access to all memory, including
- kernel and userspace memory. Accidental memory access is likely
- to be disastrous.
- Memory access is required for experts who want to debug the kernel.
-
- If you are unsure, say Y.
-
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e8d4423e4f85..224b42734f0d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -106,6 +106,7 @@ drivers-y += drivers/s390/
drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/
boot := arch/s390/boot
+tools := arch/s390/tools
all: image bzImage
@@ -124,9 +125,17 @@ vdso_install:
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
+ $(Q)$(MAKE) $(clean)=$(tools)
+
+archprepare:
+ $(Q)$(MAKE) $(build)=$(tools) include/generated/facilities.h
# Don't use tabs in echo arguments
define archhelp
echo '* image - Kernel image for IPL ($(boot)/image)'
echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)'
+ echo ' install - Install kernel using'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
+ echo ' install to $$(INSTALL_PATH)'
endef
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index ed7da281df66..0ac42cc4f880 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -10,28 +10,35 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_PERF=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
@@ -64,7 +71,6 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_CRASH_DUMP=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_NET=y
@@ -106,7 +112,6 @@ CONFIG_TCP_CONG_LP=m
CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
@@ -457,19 +462,9 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_JBD_DEBUG=y
CONFIG_JBD2_DEBUG=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
@@ -490,7 +485,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
+CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
@@ -542,10 +537,11 @@ CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_FRAME_WARN=1024
CONFIG_READABLE_ASM=y
CONFIG_UNUSED_SYMBOLS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_OBJECTS=y
@@ -588,6 +584,7 @@ CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LATENCYTOP=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 9858b14cde1e..a31dcd56f7c0 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -10,21 +10,27 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_CRASH_DUMP=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_NET=y
@@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m
CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
@@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_JBD_DEBUG=y
CONFIG_JBD2_DEBUG=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
@@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
+CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
@@ -550,6 +544,7 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
CONFIG_PM_NOTIFIER_ERROR_INJECT=m
CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
CONFIG_BLK_DEV_IO_TRACE=y
# CONFIG_KPROBE_EVENT is not set
CONFIG_LKDTM=m
@@ -557,6 +552,7 @@ CONFIG_RBTREE_TEST=m
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 7f14f80717d4..7b73bf353345 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -10,22 +10,28 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_CRASH_DUMP=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_NET=y
@@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m
CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
@@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_JBD_DEBUG=y
CONFIG_JBD2_DEBUG=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
@@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
+CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
@@ -546,6 +540,7 @@ CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
@@ -554,6 +549,7 @@ CONFIG_UPROBE_EVENT=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 92805d604173..1719843a55a2 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -23,8 +23,6 @@ CONFIG_CRASH_DUMP=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
-CONFIG_ATM=y
-CONFIG_ATM_LANE=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -54,14 +52,10 @@ CONFIG_RAW_DRIVER=y
# CONFIG_S390_VMUR is not set
# CONFIG_HID is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
CONFIG_CONFIGFS_FS=y
+# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 0b9b95f3c703..48e1a2d3e318 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -27,6 +27,7 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <crypto/xts.h>
#include "crypt_s390.h"
#define AES_KEYLEN_128 1
@@ -587,6 +588,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
switch (key_len) {
case 32:
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index b8045b97f4fb..d750cc0dfe30 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -669,11 +669,13 @@ static const struct file_operations prng_tdes_fops = {
static struct miscdevice prng_sha512_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
.fops = &prng_sha512_fops,
};
static struct miscdevice prng_tdes_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
.fops = &prng_tdes_fops,
};
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 9256b48e7e43..e24f2af4c73b 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -11,22 +11,31 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
@@ -37,6 +46,7 @@ CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -52,7 +62,6 @@ CONFIG_NET_KEY=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
CONFIG_VLAN_8021Q=y
@@ -89,10 +98,26 @@ CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
@@ -137,7 +162,6 @@ CONFIG_DEBUG_PI_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_CPU_STALL_INFO is not set
CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index b2e5902bd8f4..255c7eec4481 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry)
struct dentry *parent;
parent = dentry->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
@@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry)
}
d_delete(dentry);
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
static void hypfs_delete_tree(struct dentry *root)
@@ -278,8 +278,8 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
sbi->uid = current_uid();
sbi->gid = current_gid();
sb->s_fs_info = sbi;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = HYPFS_MAGIC;
sb->s_op = &hypfs_s_ops;
if (hypfs_parse_options(data, sb))
@@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
@@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
d_instantiate(dentry, inode);
dget(dentry);
fail:
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d68e11e0df5e..5c8db3ce61c8 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -26,26 +26,18 @@
#define wmb() barrier()
#define dma_rmb() mb()
#define dma_wmb() mb()
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
+#define __smp_mb() mb()
+#define __smp_rmb() rmb()
+#define __smp_wmb() wmb()
-#define read_barrier_depends() do { } while (0)
-#define smp_read_barrier_depends() do { } while (0)
-
-#define smp_mb__before_atomic() smp_mb()
-#define smp_mb__after_atomic() smp_mb()
-
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
-
-#define smp_store_release(p, v) \
+#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
WRITE_ONCE(*p, v); \
} while (0)
-#define smp_load_acquire(p) \
+#define __smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
@@ -53,4 +45,9 @@ do { \
___p1; \
})
+#define __smp_mb__before_atomic() barrier()
+#define __smp_mb__after_atomic() barrier()
+
+#include <asm-generic/barrier.h>
+
#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 4d7ccac5fd1d..22da3b34c655 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -15,4 +15,7 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 740364856355..d7f100c53f07 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -91,8 +91,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
* returns a 32-bit checksum
*/
static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- unsigned short len, unsigned short proto,
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
__wsum sum)
{
__u32 csum = (__force __u32)sum;
@@ -118,8 +117,7 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
*/
static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len, unsigned short proto,
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
__wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 0c5d8ee657f0..d1e7b0a0feeb 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -312,6 +312,7 @@ extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
struct cio_iplinfo {
+ u8 ssid;
u16 devno;
int is_qdio;
};
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index a0e71a501f7c..5687d62fb0cb 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -4,14 +4,23 @@
/* CLP common request & response block size */
#define CLP_BLK_SIZE PAGE_SIZE
+#define CLP_LPS_BASE 0
+#define CLP_LPS_PCI 2
+
struct clp_req_hdr {
u16 len;
u16 cmd;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
struct clp_rsp_hdr {
u16 len;
u16 rsp;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
/* CLP Response Codes */
@@ -25,4 +34,22 @@ struct clp_rsp_hdr {
#define CLP_RC_NODATA 0x0080 /* No data available */
#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+ struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[8];
+ u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc response;
+} __packed;
+
#endif
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index d350ed9d0fbb..352f7bdaf11f 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -284,7 +284,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
static inline int is_compat_task(void)
{
- return is_32bit_task();
+ return test_thread_flag(TIF_31BIT);
}
static inline void __user *arch_compat_alloc_user_space(long len)
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
index 7c31d3e25cd1..bcb9cd2a730a 100644
--- a/arch/s390/include/asm/crw.h
+++ b/arch/s390/include/asm/crw.h
@@ -52,18 +52,4 @@ void crw_wait_for_channel_report(void);
#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */
#define CRW_ERC_PMOD 0x08 /* installed parameters modified */
-static inline int stcrw(struct crw *pcrw)
-{
- int ccode;
-
- asm volatile(
- " stcrw 0(%2)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "=m" (*pcrw)
- : "a" (pcrw)
- : "cc" );
- return ccode;
-}
-
#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
index d8f9872b0e2d..4a9f35e0973f 100644
--- a/arch/s390/include/asm/device.h
+++ b/arch/s390/include/asm/device.h
@@ -3,5 +3,9 @@
*
* This file is released under the GPLv2
*/
-#include <asm-generic/device.h>
+struct dev_archdata {
+ struct dma_map_ops *dma_ops;
+};
+struct pdev_archdata {
+};
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index b3fd54d93dd2..3249b7464889 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -11,11 +11,13 @@
#define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
-extern struct dma_map_ops s390_dma_ops;
+extern struct dma_map_ops s390_pci_dma_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
- return &s390_dma_ops;
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ return &dma_noop_ops;
}
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
@@ -23,8 +25,6 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
{
}
-#include <asm-generic/dma-mapping-common.h>
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 3ad48f22de78..563ab9f44874 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -104,6 +104,9 @@
#define HWCAP_S390_TE 1024
#define HWCAP_S390_VXRS 2048
+/* Internal bits, not exposed via elf */
+#define HWCAP_INT_SIE 1UL
+
/*
* These are used to set parameters in the core dumps.
*/
@@ -126,6 +129,7 @@ typedef s390_regs elf_gregset_t;
typedef s390_fp_regs compat_elf_fpregset_t;
typedef s390_compat_regs compat_elf_gregset_t;
+#include <linux/compat.h>
#include <linux/sched.h> /* for task_struct */
#include <asm/mmu_context.h>
@@ -159,7 +163,7 @@ extern unsigned int vdso_enabled;
the loader. We need to make sure that it is out of the way of the program
that it will "exec", and that there is sufficient room for the brk. 64-bit
tasks are aligned to 4GB. */
-#define ELF_ET_DYN_BASE (is_32bit_task() ? \
+#define ELF_ET_DYN_BASE (is_compat_task() ? \
(STACK_TOP / 3 * 2) : \
(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
@@ -169,6 +173,10 @@ extern unsigned int vdso_enabled;
extern unsigned long elf_hwcap;
#define ELF_HWCAP (elf_hwcap)
+/* Internal hardware capabilities, not exposed via elf */
+
+extern unsigned long int_hwcap;
+
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
intent than poking at uname or /proc/cpuinfo.
@@ -206,9 +214,16 @@ do { \
} while (0)
#endif /* CONFIG_COMPAT */
-extern unsigned long mmap_rnd_mask;
-
-#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB. For 64-bit processes use a 512KB alignment and a randomization
+ * of up to 1GB. For 31-bit processes the virtual address space is limited,
+ * use no alignment and limit the randomization to 8MB.
+ */
+#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL)
+#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK MMAP_RND_MASK
#define ARCH_DLINFO \
do { \
@@ -222,6 +237,4 @@ struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
int arch_setup_additional_pages(struct linux_binprm *, int);
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
-
#endif
diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h
new file mode 100644
index 000000000000..4917728e5828
--- /dev/null
+++ b/arch/s390/include/asm/facilities_src.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef S390_GEN_FACILITIES_C
+#error "This file can only be included by gen_facilities.c"
+#endif
+
+#include <linux/kconfig.h>
+
+struct facility_def {
+ char *name;
+ int *bits;
+};
+
+static struct facility_def facility_defs[] = {
+ {
+ /*
+ * FACILITIES_ALS contains the list of facilities that are
+ * required to run a kernel that is compiled e.g. with
+ * -march=<machine>.
+ */
+ .name = "FACILITIES_ALS",
+ .bits = (int[]){
+#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES
+ 0, /* N3 instructions */
+ 1, /* z/Arch mode installed */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
+ 18, /* long displacement facility */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ 7, /* stfle */
+ 17, /* message security assist */
+ 21, /* extended-immediate facility */
+ 25, /* store clock fast */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ 27, /* mvcos */
+ 32, /* compare and swap and store */
+ 33, /* compare and swap and store 2 */
+ 34, /* general extension facility */
+ 35, /* execute extensions */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ 45, /* fast-BCR, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ 49, /* misc-instruction-extensions */
+ 52, /* interlocked facility 2 */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+ 53, /* load-and-zero-rightmost-byte, etc. */
+#endif
+ -1 /* END */
+ }
+ },
+};
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 0aa6a7ed95a3..09b406db7529 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -7,6 +7,10 @@
#ifndef __ASM_FACILITY_H
#define __ASM_FACILITY_H
+#include <generated/facilities.h>
+
+#ifndef __ASSEMBLY__
+
#include <linux/string.h>
#include <linux/preempt.h>
#include <asm/lowcore.h>
@@ -30,6 +34,12 @@ static inline int __test_facility(unsigned long nr, void *facilities)
*/
static inline int test_facility(unsigned long nr)
{
+ unsigned long facilities_als[] = { FACILITIES_ALS };
+
+ if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) {
+ if (__test_facility(nr, &facilities_als))
+ return 1;
+ }
return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
@@ -44,10 +54,8 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_disable();
asm volatile(
- " .insn s,0xb2b10000,0(0)\n" /* stfl */
- "0:\n"
- EX_TABLE(0b, 0b)
- : "+m" (S390_lowcore.stfl_fac_list));
+ " stfl 0(0)\n"
+ : "=m" (S390_lowcore.stfl_fac_list));
nr = 4; /* bytes stored by stfl */
memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
if (S390_lowcore.stfl_fac_list & 0x01000000) {
@@ -64,4 +72,5 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_enable();
}
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index 2559b16da525..629c90865a07 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -12,21 +12,13 @@
#include <asm/ctl_reg.h>
#include <asm/fpu/types.h>
-static inline void save_vx_regs_safe(__vector128 *vxrs)
+static inline void save_vx_regs(__vector128 *vxrs)
{
- unsigned long cr0, flags;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_set_bit(0, 17);
- __ctl_set_bit(0, 18);
asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(struct vx_array *) vxrs) : : "1");
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
}
static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
@@ -48,6 +40,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpregs->pad = 0;
+ fpregs->fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
else
@@ -57,6 +50,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
{
+ fpu->fpc = fpregs->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
else
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 000000000000..d054c1b07a3c
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,64 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+/**
+ * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+ struct list_head list;
+ struct list_head crst_list;
+ struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
+ unsigned long *table;
+ unsigned long asce;
+ unsigned long asce_end;
+ void *private;
+ bool pfault_enabled;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 39ae6a359747..6fc44dca193e 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -64,7 +64,8 @@ struct ipl_block_fcp {
struct ipl_block_ccw {
u8 reserved1[84];
- u8 reserved2[2];
+ u16 reserved2 : 13;
+ u8 ssid : 3;
u16 devno;
u8 vm_flags;
u8 reserved3[3];
@@ -86,14 +87,12 @@ struct ipl_parameter_block {
* IPL validity flags
*/
extern u32 ipl_flags;
-extern u32 dump_prefix_page;
-struct dump_save_areas {
- struct save_area_ext **areas;
- int count;
-};
-
-extern struct dump_save_areas dump_save_areas;
+struct save_area;
+struct save_area * __init save_area_alloc(bool is_boot_cpu);
+struct save_area * __init save_area_boot_cpu(void);
+void __init save_area_add_regs(struct save_area *, void *regs);
+void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
extern void do_reipl(void);
extern void do_halt(void);
@@ -175,7 +174,7 @@ enum diag308_rc {
extern int diag308(unsigned long subcode, void *addr);
extern void diag308_reset(void);
-extern void store_status(void);
+extern void store_status(void (*fn)(void *), void *data);
extern void lgr_info_log(void);
#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 16aa0c779e07..595a275c36f8 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#define ARCH_IRQ_ENABLED (3UL << (BITS_PER_LONG - 8))
+
/* store then OR system mask. */
#define __arch_local_irq_stosm(__or) \
({ \
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
__arch_local_irq_stosm(0x03);
}
+/* This only restores external and I/O interrupt state */
static inline notrace void arch_local_irq_restore(unsigned long flags)
{
- __arch_local_irq_ssm(flags);
+ /* only disabled->disabled and disabled->enabled is valid */
+ if (flags & ARCH_IRQ_ENABLED)
+ arch_local_irq_enable();
}
static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
{
- return !(flags & (3UL << (BITS_PER_LONG - 8)));
+ return !(flags & ARCH_IRQ_ENABLED);
}
static inline notrace bool arch_irqs_disabled(void)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index efaac2c3bb77..6da41fab70fb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -20,12 +20,15 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
+#include <linux/seqlock.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
#include <asm/isc.h>
-#define KVM_MAX_VCPUS 64
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
#define KVM_USER_MEM_SLOTS 32
/*
@@ -37,12 +40,41 @@
#define KVM_IRQCHIP_NUM_PINS 4096
#define KVM_HALT_POLL_NS_DEFAULT 0
+/* s390-specific vcpu->requests bit members */
+#define KVM_REQ_ENABLE_IBS 8
+#define KVM_REQ_DISABLE_IBS 9
+
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
-struct sca_entry {
+union bsca_sigp_ctrl {
+ __u8 value;
+ struct {
+ __u8 c : 1;
+ __u8 r : 1;
+ __u8 scn : 6;
+ };
+} __packed;
+
+union esca_sigp_ctrl {
+ __u16 value;
+ struct {
+ __u8 c : 1;
+ __u8 reserved: 7;
+ __u8 scn;
+ };
+} __packed;
+
+struct esca_entry {
+ union esca_sigp_ctrl sigp_ctrl;
+ __u16 reserved1[3];
+ __u64 sda;
+ __u64 reserved2[6];
+} __packed;
+
+struct bsca_entry {
__u8 reserved0;
- __u8 sigp_ctrl;
+ union bsca_sigp_ctrl sigp_ctrl;
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
@@ -57,14 +89,22 @@ union ipte_control {
};
};
-struct sca_block {
+struct bsca_block {
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
- struct sca_entry cpu[64];
+ struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
} __attribute__((packed));
+struct esca_block {
+ union ipte_control ipte_control;
+ __u64 reserved1[7];
+ __u64 mcn[4];
+ __u64 reserved2[20];
+ struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+} __packed;
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -182,24 +222,19 @@ struct kvm_s390_sie_block {
__u64 pp; /* 0x01de */
__u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
- __u8 reserved1f0[16]; /* 0x01f0 */
+ __u64 riccbd; /* 0x01f0 */
+ __u8 reserved1f8[8]; /* 0x01f8 */
} __attribute__((packed));
struct kvm_s390_itdb {
__u8 data[256];
} __packed;
-struct kvm_s390_vregs {
- __vector128 vrs[32];
- __u8 reserved200[512]; /* for future vector expansion */
-} __packed;
-
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[1280]; /* 0x0700 */
- struct kvm_s390_vregs vregs; /* 0x0c00 */
+ __u8 reserved700[2304]; /* 0x0700 */
} __packed;
struct kvm_vcpu_stat {
@@ -427,7 +462,7 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
@@ -506,7 +541,6 @@ struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
unsigned int host_acrs[NUM_ACRS];
struct fpu host_fpregs;
- struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
@@ -519,6 +553,15 @@ struct kvm_vcpu_arch {
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
+ bool cputm_enabled;
+ /*
+ * The seqcount protects updates to cputm_start and sie_block.cputm,
+ * this way we can have non-blocking reads with consistent values.
+ * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
+ * values and to start/stop/enable/disable cpu timer accounting.
+ */
+ seqcount_t cputm_seqcount;
+ __u64 cputm_start;
};
struct kvm_vm_stat {
@@ -557,15 +600,11 @@ struct s390_io_adapter {
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
-struct kvm_s390_fac {
- /* facility list requested by guest */
- __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
- /* facility mask supported by kvm & hosting machine */
- __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
-};
-
struct kvm_s390_cpu_model {
- struct kvm_s390_fac *fac;
+ /* facility mask supported by kvm & hosting machine */
+ __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ /* facility list requested by guest (in dma page) */
+ __u64 *fac_list;
struct cpuid cpu_id;
unsigned short ibc;
};
@@ -584,12 +623,25 @@ struct kvm_s390_crypto_cb {
__u8 reserved80[128]; /* 0x0080 */
};
+/*
+ * sie_page2 has to be allocated as DMA because fac_list and crycb need
+ * 31bit addresses in the sie control block.
+ */
+struct sie_page2 {
+ __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
+ struct kvm_s390_crypto_cb crycb; /* 0x0800 */
+ u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+} __packed;
+
struct kvm_arch{
- struct sca_block *sca;
+ void *sca;
+ int use_esca;
+ rwlock_t sca_lock;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap;
+ unsigned long mem_limit;
int css_support;
int use_irqchip;
int use_cmma;
@@ -601,6 +653,7 @@ struct kvm_arch{
int ipte_lock_count;
struct mutex ipte_mutex;
spinlock_t start_stop_lock;
+ struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index 7aa799134a11..d5427c78b1b3 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -19,7 +19,6 @@
#include <linux/module.h>
-#ifdef CONFIG_LIVEPATCH
static inline int klp_check_compiler_support(void)
{
return 0;
@@ -36,8 +35,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
{
regs->psw.addr = ip;
}
-#else
-#error Live patching support is disabled; check CONFIG_LIVEPATCH
-#endif
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index afe1cfebf1a4..d79ba7cf75b0 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -16,28 +16,7 @@
#define LC_ORDER 1
#define LC_PAGES 2
-struct save_area {
- u64 fp_regs[16];
- u64 gp_regs[16];
- u8 psw[16];
- u8 pad1[8];
- u32 pref_reg;
- u32 fp_ctrl_reg;
- u8 pad2[4];
- u32 tod_reg;
- u64 timer;
- u64 clk_cmp;
- u8 pad3[8];
- u32 acc_regs[16];
- u64 ctrl_regs[16];
-} __packed;
-
-struct save_area_ext {
- struct save_area sa;
- __vector128 vx_regs[32];
-};
-
-struct _lowcore {
+struct lowcore {
__u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */
__u32 ipl_parmblock_ptr; /* 0x0014 */
__u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */
@@ -204,9 +183,9 @@ struct _lowcore {
__u8 vector_save_area[1024]; /* 0x1c00 */
} __packed;
-#define S390_lowcore (*((struct _lowcore *) 0))
+#define S390_lowcore (*((struct lowcore *) 0))
-extern struct _lowcore *lowcore_ptr[];
+extern struct lowcore *lowcore_ptr[];
static inline void set_prefix(__u32 address)
{
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fb1b93ea3e3f..d321469eeda7 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,17 +15,25 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.list_lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
- mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- mm->context.asce_limit = STACK_TOP_MAX;
+ if (mm->context.asce_limit == 0) {
+ /* context created by exec, set asce limit to 4TB */
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ mm->context.asce_limit = STACK_TOP_MAX;
+ } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm_inc_nr_pmds(mm);
+ }
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
}
@@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev,
static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
- if (oldmm->context.asce_limit < mm->context.asce_limit)
- crst_table_downgrade(mm, oldmm->context.asce_limit);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
@@ -130,4 +136,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ /* by default, allow everything */
+ return true;
+}
+
+static inline bool arch_pte_access_permitted(pte_t pte, bool write)
+{
+ /* by default, allow everything */
+ return true;
+}
#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 295f2c4f1c96..943475382d51 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -38,7 +38,7 @@ u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
-int copy_from_oldmem(void *dest, void *src, size_t count);
+int copy_oldmem_kernel(void *dst, void *src, size_t count);
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..b6bfa169a002 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -9,7 +9,6 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <asm-generic/pci.h>
-#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -45,7 +44,7 @@ struct zpci_fmb {
u64 rpcit_ops;
u64 dma_rbytes;
u64 dma_wbytes;
-} __packed __aligned(16);
+} __packed __aligned(64);
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -66,7 +65,6 @@ struct s390_domain;
/* Private data per function */
struct zpci_dev {
- struct pci_dev *pdev;
struct pci_bus *bus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
@@ -192,7 +190,7 @@ int zpci_fmb_disable_device(struct zpci_dev *);
/* Debug */
int zpci_debug_init(void);
void zpci_debug_exit(void);
-void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index dd78f92f1cce..e75c64cbcf08 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -49,9 +49,6 @@ struct clp_fh_list_entry {
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u64 reserved2;
} __packed;
@@ -59,9 +56,6 @@ struct clp_req_list_pci {
/* List PCI functions response */
struct clp_rsp_list_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u32 reserved2;
u16 max_fn;
@@ -73,9 +67,6 @@ struct clp_rsp_list_pci {
/* Query PCI function request */
struct clp_req_query_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved2;
u64 reserved3;
@@ -84,9 +75,6 @@ struct clp_req_query_pci {
/* Query PCI function response */
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 : 64;
u16 vfn; /* virtual fn number */
u16 : 7;
u16 util_str_avail : 1; /* utility string available? */
@@ -108,21 +96,15 @@ struct clp_rsp_query_pci {
/* Query PCI function group request */
struct clp_req_query_pci_grp {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
- u32 : 24;
+ u32 reserved2 : 24;
u32 pfgid : 8; /* function group id */
- u32 reserved2;
- u64 reserved3;
+ u32 reserved3;
+ u64 reserved4;
} __packed;
/* Query PCI function group response */
struct clp_rsp_query_pci_grp {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
@@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp {
/* Set PCI function request */
struct clp_req_set_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u16 reserved2;
u8 oc; /* operation controls */
@@ -154,9 +133,6 @@ struct clp_req_set_pci {
/* Set PCI function response */
struct clp_rsp_set_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved3;
u64 reserved4;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7a7abf1a5537..92df3eb8d14e 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -23,6 +23,8 @@ enum zpci_ioat_dtype {
#define ZPCI_IOTA_FS_2G 2
#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
+#define ZPCI_TABLE_SIZE_RT (1UL << 42)
+
#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
@@ -195,5 +197,7 @@ void zpci_dma_exit_device(struct zpci_dev *);
void dma_free_seg_table(unsigned long);
unsigned long *dma_alloc_cpu_table(void);
void dma_cleanup_tables(unsigned long *);
-void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 1a9a98de5bde..69aa18be61af 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,10 +8,13 @@
#include <asm/pci_insn.h>
/* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES 0x7fff
-#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK 0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK 0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT 48
+#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES \
+ ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK \
+ (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
struct zpci_iomap_entry {
u32 fh;
@@ -21,8 +24,9 @@ struct zpci_iomap_entry {
extern struct zpci_iomap_entry *zpci_iomap_start;
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
#define ZPCI_IDX(addr) \
- (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+ (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
#define ZPCI_OFFSET(addr) \
((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 6d6556ca24aa..90240dfef76a 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -178,7 +178,6 @@
ret__; \
})
-#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
#include <asm-generic/percpu.h>
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index f897ec73dc8c..1f7ff85c5e4c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -21,7 +21,7 @@
#define PMU_F_ERR_LSDA 0x0200
#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
extern __init const struct attribute_group **cpumf_cf_event_group(void);
extern ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b7858f158b4..9b3d9b6099f2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
extern int page_table_allocate_pgste;
-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
-
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
@@ -100,12 +96,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- spin_lock_init(&mm->context.list_lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
- INIT_LIST_HEAD(&mm->context.gmap_list);
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ if (mm->context.asce_limit == (1UL << 31)) {
+ /* Forking a compat process with 2 page table levels */
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ }
+ return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (mm->context.asce_limit == (1UL << 31))
+ pgtable_pmd_page_dtor(virt_to_page(pgd));
+ crst_table_free(mm, (unsigned long *) pgd);
}
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 024f85f947ae..2f66645587a2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -286,7 +286,6 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
-#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */
#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */
#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */
@@ -299,15 +298,15 @@ static inline int is_module_addr(void *addr)
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * dy..R...I...wr
+ * dy..R...I...rw
* prot-none, clean, old 00..1...1...00
* prot-none, clean, young 01..1...1...00
* prot-none, dirty, old 10..1...1...00
* prot-none, dirty, young 11..1...1...00
- * read-only, clean, old 00..1...1...01
- * read-only, clean, young 01..1...0...01
- * read-only, dirty, old 10..1...1...01
- * read-only, dirty, young 11..1...0...01
+ * read-only, clean, old 00..1...1...10
+ * read-only, clean, young 01..1...0...10
+ * read-only, dirty, old 10..1...1...10
+ * read-only, dirty, young 11..1...0...10
* read-write, clean, old 00..1...1...11
* read-write, clean, young 01..1...0...11
* read-write, dirty, old 10..0...1...11
@@ -318,8 +317,6 @@ static inline int is_module_addr(void *addr)
* SW-bits: y young, d dirty, r read, w write
*/
-#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
-
/* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf000000000000000UL
#define PGSTE_FP_BIT 0x0800000000000000UL
@@ -523,19 +520,6 @@ static inline int pmd_bad(pmd_t pmd)
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
-#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern void pmdp_splitting_flush(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty);
-
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
@@ -638,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd;
}
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
- unsigned long new = 0;
-#ifdef CONFIG_PGSTE
- unsigned long old;
-
- preempt_disable();
- asm(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear PCL bit in old */
- " oihh %1,0x0080\n" /* set PCL bit in new */
- " csg %0,%1,%2\n"
- " jl 0b\n"
- : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
-#endif
- return __pgste(new);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- asm(
- " nihh %1,0xff7f\n" /* clear PCL bit */
- " stg %1,%0\n"
- : "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
- : "cc", "memory");
- preempt_enable();
-#endif
-}
-
-static inline pgste_t pgste_get(pte_t *ptep)
-{
- unsigned long pgste = 0;
-#ifdef CONFIG_PGSTE
- pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
-#endif
- return __pgste(pgste);
-}
-
-static inline void pgste_set(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
-#endif
-}
-
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address, bits, skey;
-
- if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
- return pgste;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = (unsigned long) page_get_storage_key(address);
- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
- /* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-#endif
- return pgste;
-
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address;
- unsigned long nkey;
-
- if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
- return;
- VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
- address = pte_val(entry) & PAGE_MASK;
- /*
- * Set page access key and fetch protection bit from pgste.
- * The guest C/R information is still in the PGSTE, set real
- * key C/R to 0.
- */
- nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
- nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
- page_set_storage_key(address, nkey, 0);
-#endif
-}
-
-static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
-{
- if ((pte_val(entry) & _PAGE_PRESENT) &&
- (pte_val(entry) & _PAGE_WRITE) &&
- !(pte_val(entry) & _PAGE_INVALID)) {
- if (!MACHINE_HAS_ESOP) {
- /*
- * Without enhanced suppression-on-protection force
- * the dirty bit on for all writable ptes.
- */
- pte_val(entry) |= _PAGE_DIRTY;
- pte_val(entry) &= ~_PAGE_PROTECT;
- }
- if (!(pte_val(entry) & _PAGE_PROTECT))
- /* This pte allows write access, set user-dirty */
- pgste_val(pgste) |= PGSTE_UC_BIT;
- }
- *ptep = entry;
- return pgste;
-}
-
-/**
- * struct gmap_struct - guest address space
- * @crst_list: list of all crst tables used in the guest address space
- * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
- * @guest_table_lock: spinlock to protect all entries in the guest page table
- * @table: pointer to the page directory
- * @asce: address space control element for gmap page table
- * @pfault_enabled: defines if pfaults are applicable for the guest
- */
-struct gmap {
- struct list_head list;
- struct list_head crst_list;
- struct mm_struct *mm;
- struct radix_tree_root guest_to_host;
- struct radix_tree_root host_to_guest;
- spinlock_t guest_table_lock;
- unsigned long *table;
- unsigned long asce;
- unsigned long asce_end;
- void *private;
- bool pfault_enabled;
-};
-
-/**
- * struct gmap_notifier - notify function block for page invalidation
- * @notifier_call: address of callback function
- */
-struct gmap_notifier {
- struct list_head list;
- void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
-};
-
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
-void gmap_free(struct gmap *gmap);
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len);
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
-void __gmap_zap(struct gmap *, unsigned long gaddr);
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
-
-
-void gmap_register_ipte_notifier(struct gmap_notifier *);
-void gmap_unregister_ipte_notifier(struct gmap_notifier *);
-int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
-
-static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- if (pgste_val(pgste) & PGSTE_IN_BIT) {
- pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, addr, ptep);
- }
-#endif
- return pgste;
-}
-
-/*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified. Thus, the following
- * hook is made available.
- */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t entry)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
- pgste_set_key(ptep, pgste, entry, mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else {
- *ptep = entry;
- }
-}
-
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -1005,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
} while (nr != 255);
}
-static inline void ptep_flush_direct(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte_local(address, ptep);
- else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
-static inline void ptep_flush_lazy(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pte_val(*ptep) |= _PAGE_INVALID;
- mm->context.flush_mm = 1;
- } else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
/*
- * Get (and clear) the user dirty bit for a pte.
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ * 1) ptep_get_and_clear
+ * 2) set_pte_at
+ * 3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
*/
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
- int dirty;
-
- if (!mm_has_pgste(mm))
- return 0;
- pgste = pgste_get_lock(ptep);
- dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
- pte = *ptep;
- if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
- pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep);
- if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_INVALID;
- *ptep = pte;
- }
- pgste_set_unlock(ptep, pgste);
- return dirty;
-}
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte, oldpte;
- int young;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
- }
-
- oldpte = pte = *ptep;
- ptep_flush_direct(vma->vm_mm, addr, ptep);
- young = pte_young(pte);
- pte = pte_mkold(pte);
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
+ pte_t pte = *ptep;
- return young;
+ pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+ return pte_young(pte);
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1104,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
-/*
- * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
- * both clear the TLB for the unmapped pte. The reason is that
- * ptep_get_and_clear is used in common code (e.g. change_pte_range)
- * to modify an active pte. The sequence is
- * 1) ptep_get_and_clear
- * 2) set_pte_at
- * 3) flush_tlb_range
- * On s390 the tlb needs to get flushed with the modification of the pte
- * if the pte is active. The only way how this can be implemented is to
- * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
- * is a nop.
- */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set(ptep, pgste);
- }
- return pte;
-}
-
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep, pte_t pte)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get(ptep);
- pgste_set_key(ptep, pgste, pte, mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
-}
+pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_direct(vma->vm_mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(vma->vm_mm)) {
- if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
- _PGSTE_GPS_USAGE_UNUSED)
- pte_val(pte) |= _PAGE_UNUSED;
- pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
}
/*
@@ -1213,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
*/
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pte_t *ptep, int full)
{
- pgste_t pgste;
- pte_t pte;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- if (!full)
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
+ if (full) {
+ pte_t pte = *ptep;
+ *ptep = __pte(_PAGE_INVALID);
+ return pte;
}
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
pte_t pte = *ptep;
- if (pte_write(pte)) {
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- ptep_flush_lazy(mm, address, ptep);
- pte = pte_wrprotect(pte);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
- }
- return pte;
+ if (pte_write(pte))
+ ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
+ unsigned long addr, pte_t *ptep,
pte_t entry, int dirty)
{
- pgste_t pgste;
- pte_t oldpte;
-
- oldpte = *ptep;
- if (pte_same(oldpte, entry))
+ if (pte_same(*ptep, entry))
return 0;
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ return 1;
+}
- ptep_flush_direct(vma->vm_mm, address, ptep);
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq);
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
- if (mm_has_pgste(vma->vm_mm)) {
- if (pte_val(oldpte) & _PAGE_INVALID)
- pgste_set_key(ptep, pgste, entry, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ if (mm_has_pgste(mm))
+ ptep_set_pte_at(mm, addr, ptep, entry);
+ else
*ptep = entry;
- return 1;
}
/*
@@ -1424,8 +1044,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
if (pmd_large(pmd)) {
pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
- _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT |
- _SEGMENT_ENTRY_SOFT_DIRTY;
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
@@ -1484,59 +1103,50 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
: "cc" );
}
-static inline void pmdp_flush_direct(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
-{
- int active, count;
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return;
- }
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte_local(address, pmdp);
- else
- __pmdp_idte(address, pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
-static inline void pmdp_flush_lazy(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t entry, int dirty)
{
- int active, count;
+ VM_BUG_ON(addr & ~HPAGE_MASK);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
- mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(address, pmdp);
- else
- __pmdp_csp(pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
+ if (pmd_val(*pmdp) == pmd_val(entry))
+ return 0;
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+ return 1;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+ pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+ return pmd_young(pmd);
+}
-static inline int pmd_trans_splitting(pmd_t pmd)
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
- (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
+ VM_BUG_ON(addr & ~HPAGE_MASK);
+ return pmdp_test_and_clear_young(vma, addr, pmdp);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -1553,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return pmd;
}
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- pmd_t pmd;
-
- pmd = *pmdp;
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
- *pmdp = pmd_mkold(pmd);
- return pmd_young(pmd);
-}
-
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = *pmdp;
-
- pmdp_flush_direct(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pmd_t *pmdp, int full)
{
- pmd_t pmd = *pmdp;
-
- if (!full)
- pmdp_flush_lazy(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ if (full) {
+ pmd_t pmd = *pmdp;
+ *pmdp = __pmd(_SEGMENT_ENTRY_INVALID);
+ return pmd;
+ }
+ return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+ return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
}
#define __HAVE_ARCH_PMDP_INVALIDATE
static inline void pmdp_invalidate(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
- if (pmd_write(pmd)) {
- pmdp_flush_direct(mm, address, pmdp);
- set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
- }
+ if (pmd_write(pmd))
+ pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
}
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b16c3d0a1b9f..d6fd22ea270d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -18,12 +18,14 @@
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE _BITUL(CIF_ASCE)
#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
+#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
#ifndef __ASSEMBLY__
@@ -52,6 +54,16 @@ static inline int test_cpu_flag(int flag)
return !!(S390_lowcore.cpu_flags & (1UL << flag));
}
+/*
+ * Test CIF flag of another CPU. The caller needs to ensure that
+ * CPU hotplug can not happen, e.g. by disabling preemption.
+ */
+static inline int test_cpu_flag_of(int flag, int cpu)
+{
+ struct lowcore *lc = lowcore_ptr[cpu];
+ return !!(lc->cpu_flags & (1UL << flag));
+}
+
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
/*
@@ -154,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
*/
#define start_thread(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
- regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
+ regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
- regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
+ regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
crst_table_downgrade(current->mm, 1UL << 31); \
execve_tail(); \
@@ -172,6 +184,10 @@ struct task_struct;
struct mm_struct;
struct seq_file;
+typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+void dump_trace(dump_trace_func_t func, void *data,
+ struct task_struct *task, unsigned long sp);
+
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
@@ -191,6 +207,14 @@ unsigned long get_wchan(struct task_struct *p);
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+static inline unsigned long current_stack_pointer(void)
+{
+ unsigned long sp;
+
+ asm volatile("la %0,0(15)" : "=a" (sp));
+ return sp;
+}
+
static inline unsigned short stap(void)
{
unsigned short cpu_address;
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 37cbc50947f2..99bc456cc26a 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -24,25 +24,25 @@
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
struct psw_bits {
- unsigned long long : 1;
- unsigned long long r : 1; /* PER-Mask */
- unsigned long long : 3;
- unsigned long long t : 1; /* DAT Mode */
- unsigned long long i : 1; /* Input/Output Mask */
- unsigned long long e : 1; /* External Mask */
- unsigned long long key : 4; /* PSW Key */
- unsigned long long : 1;
- unsigned long long m : 1; /* Machine-Check Mask */
- unsigned long long w : 1; /* Wait State */
- unsigned long long p : 1; /* Problem State */
- unsigned long long as : 2; /* Address Space Control */
- unsigned long long cc : 2; /* Condition Code */
- unsigned long long pm : 4; /* Program Mask */
- unsigned long long ri : 1; /* Runtime Instrumentation */
- unsigned long long : 6;
- unsigned long long eaba : 2; /* Addressing Mode */
- unsigned long long : 31;
- unsigned long long ia : 64;/* Instruction Address */
+ unsigned long : 1;
+ unsigned long r : 1; /* PER-Mask */
+ unsigned long : 3;
+ unsigned long t : 1; /* DAT Mode */
+ unsigned long i : 1; /* Input/Output Mask */
+ unsigned long e : 1; /* External Mask */
+ unsigned long key : 4; /* PSW Key */
+ unsigned long : 1;
+ unsigned long m : 1; /* Machine-Check Mask */
+ unsigned long w : 1; /* Wait State */
+ unsigned long p : 1; /* Problem State */
+ unsigned long as : 2; /* Address Space Control */
+ unsigned long cc : 2; /* Condition Code */
+ unsigned long pm : 4; /* Program Mask */
+ unsigned long ri : 1; /* Runtime Instrumentation */
+ unsigned long : 6;
+ unsigned long eaba : 2; /* Addressing Mode */
+ unsigned long : 31;
+ unsigned long ia : 64; /* Instruction Address */
};
enum {
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
#define arch_has_block_step() (1)
#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
#define user_stack_pointer(regs)((regs)->gprs[15])
#define profile_pc(regs) instruction_pointer(regs)
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->psw.addr = val | PSW_ADDR_AMODE;
+ regs->psw.addr = val;
}
int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
- return regs->gprs[15] & PSW_ADDR_INSN;
+ return regs->gprs[15];
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
index 72786067b300..fe11fa88a0e0 100644
--- a/arch/s390/include/asm/reset.h
+++ b/arch/s390/include/asm/reset.h
@@ -15,6 +15,5 @@ struct reset_call {
extern void register_reset_call(struct reset_call *reset);
extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void (*fn_pre)(void),
- void (*fn_post)(void *), void *data);
+extern void s390_reset_system(void);
#endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 4b43ee7e6776..fead491dfc28 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -31,7 +31,7 @@
* This should be totally fair - if anything is waiting, a process that wants a
* lock will go to the back of the queue. When the currently active lock is
* released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
* front, then they'll all be woken up, but no other readers will be.
*/
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 821dde5f425d..bab456be9a4f 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -29,7 +29,10 @@ struct sclp_ipl_info {
struct sclp_core_entry {
u8 core_id;
- u8 reserved0[2];
+ u8 reserved0;
+ u8 : 4;
+ u8 sief2 : 1;
+ u8 : 3;
u8 : 3;
u8 siif : 1;
u8 sigpif : 1;
@@ -53,16 +56,19 @@ struct sclp_info {
unsigned char has_sigpif : 1;
unsigned char has_core_type : 1;
unsigned char has_sprp : 1;
+ unsigned char has_hvs : 1;
+ unsigned char has_esca : 1;
+ unsigned char has_sief2 : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
unsigned int mtid_prev;
- unsigned long long rzm;
- unsigned long long rnmax;
- unsigned long long hamax;
+ unsigned long rzm;
+ unsigned long rnmax;
+ unsigned long hamax;
unsigned int max_cores;
unsigned long hsa_size;
- unsigned long long facilities;
+ unsigned long facilities;
};
extern struct sclp_info sclp;
@@ -77,8 +83,9 @@ int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
-int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
+int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
+int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
void sclp_early_detect(void);
-int _sclp_print_early(const char *);
+void _sclp_print_early(const char *);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 23537661da0e..c0f0efbb6ab5 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -12,27 +12,24 @@
#define PARMAREA 0x10400
/*
- * Machine features detected in head.S
+ * Machine features detected in early.c
*/
#define MACHINE_FLAG_VM _BITUL(0)
-#define MACHINE_FLAG_IEEE _BITUL(1)
-#define MACHINE_FLAG_CSP _BITUL(2)
-#define MACHINE_FLAG_MVPG _BITUL(3)
-#define MACHINE_FLAG_DIAG44 _BITUL(4)
+#define MACHINE_FLAG_KVM _BITUL(1)
+#define MACHINE_FLAG_LPAR _BITUL(2)
+#define MACHINE_FLAG_DIAG9C _BITUL(3)
+#define MACHINE_FLAG_ESOP _BITUL(4)
#define MACHINE_FLAG_IDTE _BITUL(5)
-#define MACHINE_FLAG_DIAG9C _BITUL(6)
-#define MACHINE_FLAG_KVM _BITUL(8)
-#define MACHINE_FLAG_ESOP _BITUL(9)
-#define MACHINE_FLAG_EDAT1 _BITUL(10)
-#define MACHINE_FLAG_EDAT2 _BITUL(11)
-#define MACHINE_FLAG_LPAR _BITUL(12)
-#define MACHINE_FLAG_LPP _BITUL(13)
-#define MACHINE_FLAG_TOPOLOGY _BITUL(14)
-#define MACHINE_FLAG_TE _BITUL(15)
-#define MACHINE_FLAG_TLB_LC _BITUL(17)
-#define MACHINE_FLAG_VX _BITUL(18)
-#define MACHINE_FLAG_CAD _BITUL(19)
+#define MACHINE_FLAG_DIAG44 _BITUL(6)
+#define MACHINE_FLAG_EDAT1 _BITUL(7)
+#define MACHINE_FLAG_EDAT2 _BITUL(8)
+#define MACHINE_FLAG_LPP _BITUL(9)
+#define MACHINE_FLAG_TOPOLOGY _BITUL(10)
+#define MACHINE_FLAG_TE _BITUL(11)
+#define MACHINE_FLAG_TLB_LC _BITUL(12)
+#define MACHINE_FLAG_VX _BITUL(13)
+#define MACHINE_FLAG_CAD _BITUL(14)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -104,6 +101,8 @@ extern void pfault_fini(void);
#define pfault_fini() do { } while (0)
#endif /* CONFIG_PFAULT */
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
extern void cmma_init(void);
extern void (*_machine_restart)(char *command);
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 5df26b11cf47..0cc383b9be7f 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -18,6 +18,7 @@
extern struct mutex smp_cpu_state_mutex;
extern unsigned int smp_cpu_mt_shift;
extern unsigned int smp_cpu_mtid;
+extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
@@ -55,7 +56,6 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
-static inline void smp_save_dump_cpus(void) { }
#endif /* CONFIG_SMP */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index f7054a892d9e..2728114d5484 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -56,7 +56,12 @@ struct sysinfo_1_2_2 {
char format;
char reserved_0[1];
unsigned short acc_offset;
- char reserved_1[20];
+ unsigned char mt_installed :1;
+ unsigned char :2;
+ unsigned char mt_stid :5;
+ unsigned char :3;
+ unsigned char mt_gtid :5;
+ char reserved_1[18];
unsigned int nominal_cap;
unsigned int secondary_cap;
unsigned int capability;
@@ -92,9 +97,13 @@ struct sysinfo_2_2_2 {
char name[8];
unsigned int caf;
char reserved_2[8];
- unsigned char mt_installed;
- unsigned char mt_general;
- unsigned char mt_psmtid;
+ unsigned char mt_installed :1;
+ unsigned char :2;
+ unsigned char mt_stid :5;
+ unsigned char :3;
+ unsigned char mt_gtid :5;
+ unsigned char :3;
+ unsigned char mt_psmtid :5;
char reserved_3[5];
unsigned short cpus_dedicated;
unsigned short cpus_shared;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 692b9247c019..2fffc2c27581 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -96,6 +96,4 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
-#define is_32bit_task() (test_thread_flag(TIF_31BIT))
-
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 94fc55fc72ce..6b53962e807e 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -7,7 +7,7 @@
struct sysinfo_15_1_x;
struct cpu;
-#ifdef CONFIG_SCHED_BOOK
+#ifdef CONFIG_SCHED_TOPOLOGY
struct cpu_topology_s390 {
unsigned short thread_id;
@@ -40,13 +40,13 @@ void store_topology(struct sysinfo_15_1_x *info);
void topology_expect_change(void);
const struct cpumask *cpu_coregroup_mask(int cpu);
-#else /* CONFIG_SCHED_BOOK */
+#else /* CONFIG_SCHED_TOPOLOGY */
static inline void topology_schedule_update(void) { }
static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
static inline void topology_expect_change(void) { }
-#endif /* CONFIG_SCHED_BOOK */
+#endif /* CONFIG_SCHED_TOPOLOGY */
#define POLARIZATION_UNKNOWN (-1)
#define POLARIZATION_HRZ (0)
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
index 776f307960cc..cc6cfe7889da 100644
--- a/arch/s390/include/asm/trace/diag.h
+++ b/arch/s390/include/asm/trace/diag.h
@@ -19,7 +19,7 @@
#define TRACE_INCLUDE_PATH asm/trace
#define TRACE_INCLUDE_FILE diag
-TRACE_EVENT(diagnose,
+TRACE_EVENT(s390_diagnose,
TP_PROTO(unsigned short nr),
TP_ARGS(nr),
TP_STRUCT__entry(
@@ -32,9 +32,9 @@ TRACE_EVENT(diagnose,
);
#ifdef CONFIG_TRACEPOINTS
-void trace_diagnose_norecursion(int diag_nr);
+void trace_s390_diagnose_norecursion(int diag_nr);
#else
-static inline void trace_diagnose_norecursion(int diag_nr) { }
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
#endif
#endif /* _TRACE_S390_DIAG_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9dd4cc47ddc7..e0900ddf91dd 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -79,18 +79,12 @@ struct exception_table_entry
int insn, fixup;
};
-static inline unsigned long extable_insn(const struct exception_table_entry *x)
-{
- return (unsigned long)&x->insn + x->insn;
-}
-
static inline unsigned long extable_fixup(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+#define ARCH_HAS_RELATIVE_EXTABLE
/**
* __copy_from_user: - Copy a block of data from user space, with less checking.
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 787acd4f9668..d0a2dbf2433d 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -38,12 +38,14 @@ struct vdso_data {
struct vdso_per_cpu_data {
__u64 ectg_timer_base;
__u64 ectg_user_time;
+ __u32 cpu_nr;
+ __u32 node_id;
};
extern struct vdso_data *vdso_data;
-int vdso_alloc_per_cpu(struct _lowcore *lowcore);
-void vdso_free_per_cpu(struct _lowcore *lowcore);
+int vdso_alloc_per_cpu(struct lowcore *lowcore);
+void vdso_free_per_cpu(struct lowcore *lowcore);
#endif /* __ASSEMBLY__ */
#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index c82eb12a5b18..c988df744a70 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -1 +1,20 @@
-#include <asm-generic/xor.h>
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_xc); \
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 000000000000..ab72d9d24373
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,28 @@
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+ unsigned int c : 1;
+ unsigned int r : 1;
+ unsigned int lps : 6;
+ unsigned int cmd : 8;
+ unsigned int : 16;
+ unsigned int reserved;
+ __u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index ef1a5fcc6c66..347fe5afa419 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_MEM_CLR_CMMA 1
#define KVM_S390_VM_MEM_LIMIT_SIZE 2
+#define KVM_S390_NO_MEM_LIMIT U64_MAX
+
/* kvm attributes for KVM_S390_VM_TOD */
#define KVM_S390_VM_TOD_LOW 0
#define KVM_S390_VM_TOD_HIGH 1
@@ -151,6 +153,8 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_ARCH0 (1UL << 4)
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
+#define KVM_SYNC_RICCB (1UL << 7)
+#define KVM_SYNC_FPRS (1UL << 8)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -165,9 +169,14 @@ struct kvm_sync_regs {
__u64 pft; /* pfault token [PFAULT] */
__u64 pfs; /* pfault select [PFAULT] */
__u64 pfc; /* pfault compare [PFAULT] */
- __u64 vrs[32][2]; /* vector registers */
+ union {
+ __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
+ __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
+ };
__u8 reserved[512]; /* for future vector expansion */
- __u32 fpc; /* only valid with vector registers */
+ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
+ __u8 padding[52]; /* riccb needs to be 64byte aligned */
+ __u8 riccb[64]; /* runtime instrumentation controls block */
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index ee69c0854c88..5dbaa72baa64 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -7,6 +7,7 @@
{ 0x9c, "DIAG (0x9c) time slice end directed" }, \
{ 0x204, "DIAG (0x204) logical-cpu utilization" }, \
{ 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x288, "DIAG (0x288) watchdog functions" }, \
{ 0x308, "DIAG (0x308) ipl functions" }, \
{ 0x500, "DIAG (0x500) KVM virtio functions" }, \
{ 0x501, "DIAG (0x501) KVM breakpoint" }
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 296942d56e6a..41b51c2f4f1b 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -91,4 +91,9 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
+#define SO_ATTACH_REUSEPORT_CBPF 51
+#define SO_ATTACH_REUSEPORT_EBPF 52
+
+#define SO_CNX_ADVICE 53
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index a848adba1504..4384bc797a54 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -192,14 +192,14 @@
#define __NR_set_tid_address 252
#define __NR_fadvise64 253
#define __NR_timer_create 254
-#define __NR_timer_settime (__NR_timer_create+1)
-#define __NR_timer_gettime (__NR_timer_create+2)
-#define __NR_timer_getoverrun (__NR_timer_create+3)
-#define __NR_timer_delete (__NR_timer_create+4)
-#define __NR_clock_settime (__NR_timer_create+5)
-#define __NR_clock_gettime (__NR_timer_create+6)
-#define __NR_clock_getres (__NR_timer_create+7)
-#define __NR_clock_nanosleep (__NR_timer_create+8)
+#define __NR_timer_settime 255
+#define __NR_timer_gettime 256
+#define __NR_timer_getoverrun 257
+#define __NR_timer_delete 258
+#define __NR_clock_settime 259
+#define __NR_clock_gettime 260
+#define __NR_clock_getres 261
+#define __NR_clock_nanosleep 262
/* Number 263 is reserved for vserver */
#define __NR_statfs64 265
#define __NR_fstatfs64 266
@@ -309,7 +309,11 @@
#define __NR_recvfrom 371
#define __NR_recvmsg 372
#define __NR_shutdown 373
-#define NR_syscalls 374
+#define __NR_mlock2 374
+#define __NR_copy_file_range 375
+#define __NR_preadv2 376
+#define __NR_pwritev2 377
+#define NR_syscalls 378
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index dc167a23b920..2f5586ab8a6a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -34,8 +34,10 @@ CFLAGS_sysinfo.o += -w
#
CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
ifneq ($(CC_FLAGS_MARCH),-march=z900)
-CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp.o += -march=z900
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o += -march=z900
+AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
+AFLAGS_head.o += -march=z900
endif
GCOV_PROFILE_sclp.o := n
@@ -50,7 +52,7 @@ extra-y += head.o head64.o vmlinux.lds
obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SCHED_BOOK) += topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 9cd248f637c7..1f95cc1faeb7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/idle.h>
#include <asm/vdso.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
/*
* Make sure that the compiler is new enough. We want a compiler that
@@ -80,6 +81,8 @@ int main(void)
OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
+ OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
+ OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
BLANK();
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
@@ -97,95 +100,96 @@ int main(void)
OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
BLANK();
/* hardware defined lowcore locations 0x000 - 0x1ff */
- OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params);
- OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr);
- OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code);
- OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc);
- OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code);
- OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc);
- OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code);
- OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code);
- OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num);
- OFFSET(__LC_PER_CODE, _lowcore, per_code);
- OFFSET(__LC_PER_ATMID, _lowcore, per_atmid);
- OFFSET(__LC_PER_ADDRESS, _lowcore, per_address);
- OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id);
- OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id);
- OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id);
- OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id);
- OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code);
- OFFSET(__LC_MON_CODE, _lowcore, monitor_code);
- OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id);
- OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr);
- OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm);
- OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word);
- OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list);
- OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code);
- OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address);
- OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr);
- OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw);
- OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw);
- OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw);
- OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw);
- OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw);
- OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw);
- OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw);
- OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw);
- OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw);
- OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw);
- OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw);
- OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw);
+ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
+ OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
+ OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
+ OFFSET(__LC_SVC_ILC, lowcore, svc_ilc);
+ OFFSET(__LC_SVC_INT_CODE, lowcore, svc_code);
+ OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+ OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
+ OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
+ OFFSET(__LC_PER_CODE, lowcore, per_code);
+ OFFSET(__LC_PER_ATMID, lowcore, per_atmid);
+ OFFSET(__LC_PER_ADDRESS, lowcore, per_address);
+ OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id);
+ OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id);
+ OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id);
+ OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id);
+ OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code);
+ OFFSET(__LC_MON_CODE, lowcore, monitor_code);
+ OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id);
+ OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr);
+ OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm);
+ OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word);
+ OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list);
+ OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list);
+ OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+ OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
+ OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
+ OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
+ OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
+ OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+ OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw);
+ OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw);
+ OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw);
+ OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw);
+ OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw);
+ OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw);
+ OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw);
+ OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
+ OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
/* software defined lowcore locations 0x200 - 0xdff*/
- OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync);
- OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async);
- OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags);
- OFFSET(__LC_RETURN_PSW, _lowcore, return_psw);
- OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw);
- OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer);
- OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer);
- OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer);
- OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer);
- OFFSET(__LC_USER_TIMER, _lowcore, user_timer);
- OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer);
- OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer);
- OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer);
- OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock);
- OFFSET(__LC_INT_CLOCK, _lowcore, int_clock);
- OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock);
- OFFSET(__LC_CURRENT, _lowcore, current_task);
- OFFSET(__LC_THREAD_INFO, _lowcore, thread_info);
- OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack);
- OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack);
- OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack);
- OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack);
- OFFSET(__LC_RESTART_FN, _lowcore, restart_fn);
- OFFSET(__LC_RESTART_DATA, _lowcore, restart_data);
- OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source);
- OFFSET(__LC_USER_ASCE, _lowcore, user_asce);
- OFFSET(__LC_LPP, _lowcore, lpp);
- OFFSET(__LC_CURRENT_PID, _lowcore, current_pid);
- OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset);
- OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data);
- OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags);
- OFFSET(__LC_GMAP, _lowcore, gmap);
- OFFSET(__LC_PASTE, _lowcore, paste);
+ OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
+ OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
+ OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
+ OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
+ OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer);
+ OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer);
+ OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
+ OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
+ OFFSET(__LC_USER_TIMER, lowcore, user_timer);
+ OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer);
+ OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer);
+ OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
+ OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
+ OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
+ OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
+ OFFSET(__LC_CURRENT, lowcore, current_task);
+ OFFSET(__LC_THREAD_INFO, lowcore, thread_info);
+ OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
+ OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+ OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
+ OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
+ OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
+ OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
+ OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+ OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+ OFFSET(__LC_LPP, lowcore, lpp);
+ OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
+ OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
+ OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data);
+ OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
+ OFFSET(__LC_GMAP, lowcore, gmap);
+ OFFSET(__LC_PASTE, lowcore, paste);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
- OFFSET(__LC_DUMP_REIPL, _lowcore, ipib);
+ OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
- OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr);
- OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2);
- OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area);
- OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area);
- OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area);
- OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area);
- OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area);
- OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area);
- OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area);
- OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area);
- OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area);
- OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area);
- OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb);
+ OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+ OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+ OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
+ OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
+ OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area);
+ OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area);
+ OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area);
+ OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
+ OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
+ OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+ OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
+ OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
+ OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
/* gmap/sie offsets */
OFFSET(__GMAP_ASCE, gmap, asce);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 66c94417c0ba..4af60374eba0 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
/* Restore high gprs from signal stack */
if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
- sizeof(&sregs_ext->gprs_high)))
+ sizeof(sregs_ext->gprs_high)))
return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
*(__u32 *)&regs->gprs[i] = gprs_high[i];
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 09f194052df3..ae2cda5eee5a 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -176,3 +176,5 @@ COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
+COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 7f768914fb4f..7f48e568ac64 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
(((unsigned long)response + rlen) >> 31)) {
lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
if (!lowbuf) {
- pr_warning("The cpcmd kernel function failed to "
- "allocate a response buffer\n");
+ pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
}
spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 171e09bb8ea2..3986c9f62191 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/elf.h>
+#include <asm/asm-offsets.h>
#include <linux/memblock.h>
#include <asm/os_info.h>
#include <asm/elf.h>
@@ -32,7 +33,82 @@ static struct memblock_type oldmem_type = {
.regions = &oldmem_region,
};
-struct dump_save_areas dump_save_areas;
+struct save_area {
+ struct list_head list;
+ u64 psw[2];
+ u64 ctrs[16];
+ u64 gprs[16];
+ u32 acrs[16];
+ u64 fprs[16];
+ u32 fpc;
+ u32 prefix;
+ u64 todpreg;
+ u64 timer;
+ u64 todcmp;
+ u64 vxrs_low[16];
+ __vector128 vxrs_high[16];
+};
+
+static LIST_HEAD(dump_save_areas);
+
+/*
+ * Allocate a save area
+ */
+struct save_area * __init save_area_alloc(bool is_boot_cpu)
+{
+ struct save_area *sa;
+
+ sa = (void *) memblock_alloc(sizeof(*sa), 8);
+ if (is_boot_cpu)
+ list_add(&sa->list, &dump_save_areas);
+ else
+ list_add_tail(&sa->list, &dump_save_areas);
+ return sa;
+}
+
+/*
+ * Return the address of the save area for the boot CPU
+ */
+struct save_area * __init save_area_boot_cpu(void)
+{
+ if (list_empty(&dump_save_areas))
+ return NULL;
+ return list_first_entry(&dump_save_areas, struct save_area, list);
+}
+
+/*
+ * Copy CPU registers into the save area
+ */
+void __init save_area_add_regs(struct save_area *sa, void *regs)
+{
+ struct lowcore *lc;
+
+ lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA);
+ memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw));
+ memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs));
+ memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs));
+ memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs));
+ memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs));
+ memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc));
+ memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix));
+ memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg));
+ memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer));
+ memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp));
+}
+
+/*
+ * Copy vector registers into the save area
+ */
+void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
+{
+ int i;
+
+ /* Copy lower halves of vector registers 0-15 */
+ for (i = 0; i < 16; i++)
+ memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+ /* Copy vector registers 16-31 */
+ memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
+}
/*
* Return physical address for virtual address
@@ -51,79 +127,85 @@ static inline void *load_real_addr(void *addr)
}
/*
- * Copy real to virtual or real memory
- */
-static int copy_from_realmem(void *dest, void *src, size_t count)
-{
- unsigned long size;
-
- if (!count)
- return 0;
- if (!is_vmalloc_or_module_addr(dest))
- return memcpy_real(dest, src, count);
- do {
- size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
- if (memcpy_real(load_real_addr(dest), src, size))
- return -EFAULT;
- count -= size;
- dest += size;
- src += size;
- } while (count);
- return 0;
-}
-
-/*
- * Pointer to ELF header in new kernel
+ * Copy memory of the old, dumped system to a kernel space virtual address
*/
-static void *elfcorehdr_newmem;
-
-/*
- * Copy one page from zfcpdump "oldmem"
- *
- * For pages below HSA size memory from the HSA is copied. Otherwise
- * real memory copy is used.
- */
-static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
- unsigned long src, int userbuf)
+int copy_oldmem_kernel(void *dst, void *src, size_t count)
{
+ unsigned long from, len;
+ void *ra;
int rc;
- if (src < sclp.hsa_size) {
- rc = memcpy_hsa(buf, src, csize, userbuf);
- } else {
- if (userbuf)
- rc = copy_to_user_real((void __force __user *) buf,
- (void *) src, csize);
- else
- rc = memcpy_real(buf, (void *) src, csize);
+ while (count) {
+ from = __pa(src);
+ if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ /* Copy from zfcpdump HSA area */
+ len = min(count, sclp.hsa_size - from);
+ rc = memcpy_hsa_kernel(dst, from, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+ if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
+ from -= OLDMEM_BASE;
+ len = min(count, OLDMEM_SIZE - from);
+ } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
+ len = min(count, OLDMEM_SIZE - from);
+ from += OLDMEM_BASE;
+ } else {
+ len = count;
+ }
+ if (is_vmalloc_or_module_addr(dst)) {
+ ra = load_real_addr(dst);
+ len = min(PAGE_SIZE - offset_in_page(ra), len);
+ } else {
+ ra = dst;
+ }
+ if (memcpy_real(ra, (void *) from, len))
+ return -EFAULT;
+ }
+ dst += len;
+ src += len;
+ count -= len;
}
- return rc ? rc : csize;
+ return 0;
}
/*
- * Copy one page from kdump "oldmem"
- *
- * For the kdump reserved memory this functions performs a swap operation:
- * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
- * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ * Copy memory of the old, dumped system to a user space virtual address
*/
-static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
- unsigned long src, int userbuf)
-
+int copy_oldmem_user(void __user *dst, void *src, size_t count)
{
+ unsigned long from, len;
int rc;
- if (src < OLDMEM_SIZE)
- src += OLDMEM_BASE;
- else if (src > OLDMEM_BASE &&
- src < OLDMEM_BASE + OLDMEM_SIZE)
- src -= OLDMEM_BASE;
- if (userbuf)
- rc = copy_to_user_real((void __force __user *) buf,
- (void *) src, csize);
- else
- rc = copy_from_realmem(buf, (void *) src, csize);
- return (rc == 0) ? rc : csize;
+ while (count) {
+ from = __pa(src);
+ if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ /* Copy from zfcpdump HSA area */
+ len = min(count, sclp.hsa_size - from);
+ rc = memcpy_hsa_user(dst, from, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+ if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
+ from -= OLDMEM_BASE;
+ len = min(count, OLDMEM_SIZE - from);
+ } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
+ len = min(count, OLDMEM_SIZE - from);
+ from += OLDMEM_BASE;
+ } else {
+ len = count;
+ }
+ rc = copy_to_user_real(dst, (void *) from, count);
+ if (rc)
+ return rc;
+ }
+ dst += len;
+ src += len;
+ count -= len;
+ }
+ return 0;
}
/*
@@ -132,15 +214,17 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
unsigned long offset, int userbuf)
{
- unsigned long src;
+ void *src;
+ int rc;
if (!csize)
return 0;
- src = (pfn << PAGE_SHIFT) + offset;
- if (OLDMEM_BASE)
- return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+ src = (void *) (pfn << PAGE_SHIFT) + offset;
+ if (userbuf)
+ rc = copy_oldmem_user((void __force __user *) buf, src, csize);
else
- return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+ rc = copy_oldmem_kernel((void *) buf, src, csize);
+ return rc;
}
/*
@@ -209,33 +293,6 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
}
/*
- * Copy memory from old kernel
- */
-int copy_from_oldmem(void *dest, void *src, size_t count)
-{
- unsigned long copied = 0;
- int rc;
-
- if (OLDMEM_BASE) {
- if ((unsigned long) src < OLDMEM_SIZE) {
- copied = min(count, OLDMEM_SIZE - (unsigned long) src);
- rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
- if (rc)
- return rc;
- }
- } else {
- unsigned long hsa_end = sclp.hsa_size;
- if ((unsigned long) src < hsa_end) {
- copied = min(count, hsa_end - (unsigned long) src);
- rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
- if (rc)
- return rc;
- }
- }
- return copy_from_realmem(dest + copied, src + copied, count - copied);
-}
-
-/*
* Alloc memory and panic in case of ENOMEM
*/
static void *kzalloc_panic(int len)
@@ -251,8 +308,8 @@ static void *kzalloc_panic(int len)
/*
* Initialize ELF note
*/
-static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
- const char *name)
+static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
+ const char *name)
{
Elf64_Nhdr *note;
u64 len;
@@ -272,136 +329,42 @@ static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
return PTR_ADD(buf, len);
}
-/*
- * Initialize prstatus note
- */
-static void *nt_prstatus(void *ptr, struct save_area *sa)
+static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
{
- struct elf_prstatus nt_prstatus;
- static int cpu_nr = 1;
-
- memset(&nt_prstatus, 0, sizeof(nt_prstatus));
- memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
- memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
- memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
- nt_prstatus.pr_pid = cpu_nr;
- cpu_nr++;
-
- return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
- "CORE");
+ return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME);
}
/*
- * Initialize fpregset (floating point) note
+ * Fill ELF notes for one CPU with save area registers
*/
-static void *nt_fpregset(void *ptr, struct save_area *sa)
+static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
{
+ struct elf_prstatus nt_prstatus;
elf_fpregset_t nt_fpregset;
+ /* Prepare prstatus note */
+ memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+ memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs));
+ memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+ memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs));
+ nt_prstatus.pr_pid = cpu;
+ /* Prepare fpregset (floating point) note */
memset(&nt_fpregset, 0, sizeof(nt_fpregset));
- memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
- memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
-
- return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
- "CORE");
-}
-
-/*
- * Initialize timer note
- */
-static void *nt_s390_timer(void *ptr, struct save_area *sa)
-{
- return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
- KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize TOD clock comparator note
- */
-static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
-{
- return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
- sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize TOD programmable register note
- */
-static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
-{
- return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
- sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize control register note
- */
-static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
-{
- return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
- sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize prefix register note
- */
-static void *nt_s390_prefix(void *ptr, struct save_area *sa)
-{
- return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
- sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize vxrs high note (full 128 bit VX registers 16-31)
- */
-static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
-{
- return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
- 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
-}
-
-/*
- * Initialize vxrs low note (lower halves of VX registers 0-15)
- */
-static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
-{
- Elf64_Nhdr *note;
- u64 len;
- int i;
-
- note = (Elf64_Nhdr *)ptr;
- note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
- note->n_descsz = 16 * 8;
- note->n_type = NT_S390_VXRS_LOW;
- len = sizeof(Elf64_Nhdr);
-
- memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
- len = roundup(len + note->n_namesz, 4);
-
- ptr += len;
- /* Copy lower halves of SIMD registers 0-15 */
- for (i = 0; i < 16; i++) {
- memcpy(ptr, &vx_regs[i].u[2], 8);
- ptr += 8;
- }
- return ptr;
-}
-
-/*
- * Fill ELF notes for one CPU with save area registers
- */
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs)
-{
- ptr = nt_prstatus(ptr, sa);
- ptr = nt_fpregset(ptr, sa);
- ptr = nt_s390_timer(ptr, sa);
- ptr = nt_s390_tod_cmp(ptr, sa);
- ptr = nt_s390_tod_preg(ptr, sa);
- ptr = nt_s390_ctrs(ptr, sa);
- ptr = nt_s390_prefix(ptr, sa);
- if (MACHINE_HAS_VX && vx_regs) {
- ptr = nt_s390_vx_low(ptr, vx_regs);
- ptr = nt_s390_vx_high(ptr, vx_regs);
+ memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
+ memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
+ /* Create ELF notes for the CPU */
+ ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
+ ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
+ ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
+ ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
+ ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
+ ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
+ ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+ if (MACHINE_HAS_VX) {
+ ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
+ &sa->vxrs_high, sizeof(sa->vxrs_high));
+ ptr = nt_init(ptr, NT_S390_VXRS_LOW,
+ &sa->vxrs_low, sizeof(sa->vxrs_low));
}
return ptr;
}
@@ -416,8 +379,7 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
strcpy(prpsinfo.pr_fname, "vmlinux");
- return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
- KEXEC_CORE_NOTE_NAME);
+ return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
}
/*
@@ -429,17 +391,18 @@ static void *get_vmcoreinfo_old(unsigned long *size)
Elf64_Nhdr note;
void *addr;
- if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
return NULL;
memset(nt_name, 0, sizeof(nt_name));
- if (copy_from_oldmem(&note, addr, sizeof(note)))
+ if (copy_oldmem_kernel(&note, addr, sizeof(note)))
return NULL;
- if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+ if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
+ sizeof(nt_name) - 1))
return NULL;
if (strcmp(nt_name, "VMCOREINFO") != 0)
return NULL;
vmcoreinfo = kzalloc_panic(note.n_descsz);
- if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+ if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz))
return NULL;
*size = note.n_descsz;
return vmcoreinfo;
@@ -458,7 +421,7 @@ static void *nt_vmcoreinfo(void *ptr)
vmcoreinfo = get_vmcoreinfo_old(&size);
if (!vmcoreinfo)
return ptr;
- return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+ return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
}
/*
@@ -487,13 +450,12 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
*/
static int get_cpu_cnt(void)
{
- int i, cpus = 0;
+ struct save_area *sa;
+ int cpus = 0;
- for (i = 0; i < dump_save_areas.count; i++) {
- if (dump_save_areas.areas[i]->sa.pref_reg == 0)
- continue;
- cpus++;
- }
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ cpus++;
return cpus;
}
@@ -538,18 +500,16 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
*/
static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
{
- struct save_area_ext *sa_ext;
+ struct save_area *sa;
void *ptr_start = ptr;
- int i;
+ int cpu;
ptr = nt_prpsinfo(ptr);
- for (i = 0; i < dump_save_areas.count; i++) {
- sa_ext = dump_save_areas.areas[i];
- if (sa_ext->sa.pref_reg == 0)
- continue;
- ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs);
- }
+ cpu = 1;
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
ptr = nt_vmcoreinfo(ptr);
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = PT_NOTE;
@@ -573,9 +533,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
/* If we are not in kdump or zfcpdump mode return */
if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
return 0;
- /* If elfcorehdr= has been passed via cmdline, we use that one */
- if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
- return 0;
/* If we cannot get HSA size for zfcpdump return error */
if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
return -ENODEV;
@@ -606,7 +563,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
hdr_off = PTR_DIFF(ptr, hdr);
loads_init(phdr_loads, hdr_off);
*addr = (unsigned long long) hdr;
- elfcorehdr_newmem = hdr;
*size = (unsigned long long) hdr_off;
BUG_ON(elfcorehdr_size > alloc_size);
return 0;
@@ -617,8 +573,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
*/
void elfcorehdr_free(unsigned long long addr)
{
- if (!elfcorehdr_newmem)
- return;
kfree((void *)(unsigned long)addr);
}
@@ -629,7 +583,6 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
void *src = (void *)(unsigned long)*ppos;
- src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
memcpy(buf, src, count);
*ppos += count;
return count;
@@ -641,15 +594,8 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
void *src = (void *)(unsigned long)*ppos;
- int rc;
- if (elfcorehdr_newmem) {
- memcpy(buf, src, count);
- } else {
- rc = copy_from_oldmem(buf, src, count);
- if (rc)
- return rc;
- }
+ memcpy(buf, src, count);
*ppos += count;
return count;
}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6fca0e46464e..aa12de72fd47 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
/* Since debugfs currently does not support uid/gid other than root, */
/* we do not allow gid/uid != 0 until we get support for that. */
if ((uid != 0) || (gid != 0))
- pr_warning("Root becomes the owner of all s390dbf files "
- "in sysfs\n");
+ pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
BUG_ON(!initialized);
mutex_lock(&debug_mutex);
@@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view,
new_level = debug_get_uint(str);
}
if(new_level < 0) {
- pr_warning("%s is not a valid level for a debug "
- "feature\n", str);
+ pr_warn("%s is not a valid level for a debug feature\n", str);
rc = -EINVAL;
} else {
debug_set_level(id, new_level);
@@ -1470,7 +1468,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
except_str = "*";
else
except_str = "-";
- caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+ caller = (unsigned long) entry->caller;
rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ",
area, (long long)time_spec.tv_sec,
time_spec.tv_nsec / 1000, level, except_str,
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f98766ede4e1..48b37b8357e6 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -121,14 +121,14 @@ device_initcall(show_diag_stat_init);
void diag_stat_inc(enum diag_stat_enum nr)
{
this_cpu_inc(diag_stat.counter[nr]);
- trace_diagnose(diag_map[nr].code);
+ trace_s390_diagnose(diag_map[nr].code);
}
EXPORT_SYMBOL(diag_stat_inc);
void diag_stat_inc_norecursion(enum diag_stat_enum nr)
{
this_cpu_inc(diag_stat.counter[nr]);
- trace_diagnose_norecursion(diag_map[nr].code);
+ trace_s390_diagnose_norecursion(diag_map[nr].code);
}
EXPORT_SYMBOL(diag_stat_inc_norecursion);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 8140d10c6785..8cb9bfdd3ea8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -2015,7 +2015,7 @@ void show_code(struct pt_regs *regs)
*ptr++ = '\t';
ptr += print_insn(ptr, code + start, addr);
start += opsize;
- printk(buffer);
+ printk("%s", buffer);
ptr = buffer;
ptr += sprintf(ptr, "\n ");
hops++;
@@ -2042,7 +2042,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
ptr += print_insn(ptr, code, (unsigned long) code);
*ptr++ = '\n';
*ptr++ = 0;
- printk(buffer);
+ printk("%s", buffer);
code += opsize;
len -= opsize;
}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dc8e20473484..1b6081c0aff9 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/kdebug.h>
#include <linux/ptrace.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <asm/processor.h>
@@ -19,76 +20,87 @@
#include <asm/ipl.h>
/*
- * For show_trace we have tree different stack to consider:
+ * For dump_trace we have tree different stack to consider:
* - the panic stack which is used if the kernel stack has overflown
* - the asynchronous interrupt stack (cpu related)
* - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
+ * The stack trace can start at any of the three stacks and can potentially
* touch all of them. The order is: panic stack, async stack, sync stack.
*/
static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ unsigned long low, unsigned long high)
{
struct stack_frame *sf;
struct pt_regs *regs;
- unsigned long addr;
while (1) {
- sp = sp & PSW_ADDR_INSN;
if (sp < low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8] & PSW_ADDR_INSN;
- printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
/* Follow the backchain. */
while (1) {
+ if (func(data, sf->gprs[8]))
+ return sp;
low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
+ sp = sf->back_chain;
if (!sp)
break;
if (sp <= low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8] & PSW_ADDR_INSN;
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
}
/* Zero backchain detected, check for interrupt frame. */
sp = (unsigned long) (sf + 1);
if (sp <= low || sp > high - sizeof(*regs))
return sp;
regs = (struct pt_regs *) sp;
- addr = regs->psw.addr & PSW_ADDR_INSN;
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ if (!user_mode(regs)) {
+ if (func(data, regs->psw.addr))
+ return sp;
+ }
low = sp;
sp = regs->gprs[15];
}
}
-static void show_trace(struct task_struct *task, unsigned long *stack)
+void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
+ unsigned long sp)
{
- const unsigned long frame_size =
- STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- register unsigned long __r15 asm ("15");
- unsigned long sp;
+ unsigned long frame_size;
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.panic_stack + frame_size - 4096,
S390_lowcore.panic_stack + frame_size);
#endif
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
S390_lowcore.async_stack + frame_size);
if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ __dump_trace(func, data, sp,
+ (unsigned long)task_stack_page(task),
+ (unsigned long)task_stack_page(task) + THREAD_SIZE);
else
- __show_trace(sp, S390_lowcore.thread_info,
+ __dump_trace(func, data, sp,
+ S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
+}
+EXPORT_SYMBOL_GPL(dump_trace);
+
+static int show_address(void *data, unsigned long address)
+{
+ printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ return 0;
+}
+
+static void show_trace(struct task_struct *task, unsigned long sp)
+{
+ if (!sp)
+ sp = task ? task->thread.ksp : current_stack_pointer();
+ printk("Call Trace:\n");
+ dump_trace(show_address, NULL, task, sp);
if (!task)
task = current;
debug_show_held_locks(task);
@@ -96,15 +108,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack)
void show_stack(struct task_struct *task, unsigned long *sp)
{
- register unsigned long *__r15 asm ("15");
unsigned long *stack;
int i;
- if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
+ stack = sp;
+ if (!stack) {
+ if (!task)
+ stack = (unsigned long *)current_stack_pointer();
+ else
+ stack = (unsigned long *)task->thread.ksp;
+ }
for (i = 0; i < 20; i++) {
if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
break;
@@ -113,7 +126,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
printk("%016lx ", *stack++);
}
printk("\n");
- show_trace(task, sp);
+ show_trace(task, (unsigned long)sp);
}
static void show_last_breaking_event(struct pt_regs *regs)
@@ -122,13 +135,9 @@ static void show_last_breaking_event(struct pt_regs *regs)
printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
}
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
void show_registers(struct pt_regs *regs)
{
+ struct psw_bits *psw = &psw_bits(regs->psw);
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
@@ -137,13 +146,9 @@ void show_registers(struct pt_regs *regs)
printk(" (%pSR)", (void *)regs->psw.addr);
printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
- printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+ "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
+ psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
+ printk(" RI:%x EA:%x", psw->ri, psw->eaba);
printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
printk(" %016lx %016lx %016lx %016lx\n",
@@ -161,7 +166,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_trace(NULL, regs->gprs[15]);
show_last_breaking_event(regs);
}
@@ -185,9 +190,8 @@ void die(struct pt_regs *regs, const char *str)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC");
printk("\n");
notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
print_modules();
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3c31609df959..a0684de5a93b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -252,14 +252,14 @@ static void early_pgm_check_handler(void)
unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
- fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+ fixup = search_exception_tables(addr);
if (!fixup)
disabled_wait(0);
/* Disable low address protection before storing into lowcore. */
__ctl_store(cr0, 0, 0);
cr0_new = cr0 & ~(1UL << 28);
__ctl_load(cr0_new, 0, 0);
- S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+ S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
__ctl_load(cr0, 0, 0);
}
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+ psw.addr = (unsigned long) s390_base_ext_handler;
S390_lowcore.external_new_psw = psw;
- psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ psw.addr = (unsigned long) s390_base_pgm_handler;
S390_lowcore.program_new_psw = psw;
s390_base_pgm_handler_fn = early_pgm_check_handler;
}
@@ -335,6 +335,14 @@ static __init void detect_machine_facilities(void)
}
}
+static inline void save_vector_registers(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (test_facility(129))
+ save_vx_regs(boot_cpu_vector_save_area);
+#endif
+}
+
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -440,7 +448,6 @@ void __init startup_init(void)
rescue_initrd();
clear_bss_section();
init_kernel_storage_key();
- lockdep_init();
lockdep_off();
setup_lowcore_early();
setup_facility_list();
@@ -451,6 +458,7 @@ void __init startup_init(void)
detect_diag9c();
detect_diag44();
detect_machine_facilities();
+ save_vector_registers();
setup_topology();
sclp_early_detect();
lockdep_on();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 857b6526d298..2d47f9cfcb36 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -186,6 +186,7 @@ ENTRY(__switch_to)
stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
+ /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
@@ -764,6 +765,7 @@ ENTRY(psw_idle)
.insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
.Lpsw_idle_stcctm:
#endif
+ oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
@@ -1146,6 +1148,7 @@ cleanup_critical:
.quad .Lio_done - 4
.Lcleanup_idle:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
# copy interrupt clock & cpu timer
mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
@@ -1197,114 +1200,12 @@ cleanup_critical:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bor %r14
- clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
- jhe 5f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
- jhe 4f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
- jhe 2f
- clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
- jhe 1f
- lg %r2,__LC_CURRENT
- aghi %r2,__TASK_thread
-0: # Store floating-point controls
- stfpc __THREAD_FPU_fpc(%r2)
-1: # Load register save area and check if VX is active
- lg %r3,__THREAD_FPU_regs(%r2)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- jz 4f # no VX -> store FP regs
-2: # Store vector registers (V0-V15)
- VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
-3: # Store vector registers (V16-V31)
- VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
- j 5f # -> done, set CIF_FPU flag
-4: # Store floating-point registers
- std 0,0(%r3)
- std 1,8(%r3)
- std 2,16(%r3)
- std 3,24(%r3)
- std 4,32(%r3)
- std 5,40(%r3)
- std 6,48(%r3)
- std 7,56(%r3)
- std 8,64(%r3)
- std 9,72(%r3)
- std 10,80(%r3)
- std 11,88(%r3)
- std 12,96(%r3)
- std 13,104(%r3)
- std 14,112(%r3)
- std 15,120(%r3)
-5: # Set CIF_FPU flag
- oi __LC_CPU_FLAGS+7,_CIF_FPU
- lg %r9,48(%r11) # return from save_fpu_regs
+ larl %r9,save_fpu_regs
br %r14
-.Lcleanup_save_fpu_fpc_end:
- .quad .Lsave_fpu_regs_fpc_end
-.Lcleanup_save_fpu_regs_vx_low:
- .quad .Lsave_fpu_regs_vx_low
-.Lcleanup_save_fpu_regs_vx_high:
- .quad .Lsave_fpu_regs_vx_high
-.Lcleanup_save_fpu_regs_fp:
- .quad .Lsave_fpu_regs_fp
-.Lcleanup_save_fpu_regs_done:
- .quad .Lsave_fpu_regs_done
.Lcleanup_load_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bnor %r14
- clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
- jhe 1f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
- jhe 2f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
- jhe 4f
- lg %r4,__LC_CURRENT
- aghi %r4,__TASK_thread
- lfpc __THREAD_FPU_fpc(%r4)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
- jz 2f # -> no VX, load FP regs
-4: # Load V0 ..V15 registers
- VLM %v0,%v15,0,%r4
-3: # Load V16..V31 registers
- VLM %v16,%v31,256,%r4
- j 1f
-2: # Load floating-point registers
- ld 0,0(%r4)
- ld 1,8(%r4)
- ld 2,16(%r4)
- ld 3,24(%r4)
- ld 4,32(%r4)
- ld 5,40(%r4)
- ld 6,48(%r4)
- ld 7,56(%r4)
- ld 8,64(%r4)
- ld 9,72(%r4)
- ld 10,80(%r4)
- ld 11,88(%r4)
- ld 12,96(%r4)
- ld 13,104(%r4)
- ld 14,112(%r4)
- ld 15,120(%r4)
-1: # Clear CIF_FPU bit
- ni __LC_CPU_FLAGS+7,255-_CIF_FPU
- lg %r9,48(%r11) # return from load_fpu_regs
+ larl %r9,load_fpu_regs
br %r14
-.Lcleanup_load_fpu_regs_vx:
- .quad .Lload_fpu_regs_vx
-.Lcleanup_load_fpu_regs_vx_high:
- .quad .Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp:
- .quad .Lload_fpu_regs_fp
-.Lcleanup_load_fpu_regs_done:
- .quad .Lload_fpu_regs_done
/*
* Integer constants
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e0eaf11134b4..0f7bfeba6da6 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
- ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+ ip -= MCOUNT_INSN_SIZE;
trace.func = ip;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to. */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 1255c6c5353e..fcaefb041364 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -25,7 +25,9 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
+#include <asm/facility.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
#define ARCH_OFFSET 4
@@ -59,19 +61,6 @@ __HEAD
.long 0x020006e0,0x20000050
.org 0x200
-#
-# subroutine to set architecture mode
-#
-.Lsetmode:
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
- br %r14
#
# subroutine to wait for end I/O
@@ -159,7 +148,14 @@ __HEAD
.long 0x02200050,0x00000000
iplstart:
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
lh %r1,0xb8 # test if subchannel number
bct %r1,.Lnoload # is valid
l %r1,0xb8 # load ipl subchannel number
@@ -269,71 +265,6 @@ iplstart:
.Lcpuid:.fill 8,1,0
#
-# SALIPL loader support. Based on a patch by Rob van der Heij.
-# This entry point is called directly from the SALIPL loader and
-# doesn't need a builtin ipl record.
-#
- .org 0x800
-ENTRY(start)
- stm %r0,%r15,0x07b0 # store registers
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
- basr %r12,%r0
-.base:
- l %r11,.parm
- l %r8,.cmd # pointer to command buffer
-
- ltr %r9,%r9 # do we have SALIPL parameters?
- bp .sk8x8
-
- mvc 0(64,%r8),0x00b0 # copy saved registers
- xc 64(240-64,%r8),0(%r8) # remainder of buffer
- tr 0(64,%r8),.lowcase
- b .gotr
-.sk8x8:
- mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
-.gotr:
- slr %r0,%r0
- st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
- st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
- j startup # continue with startup
-.cmd: .long COMMAND_LINE # address of command line buffer
-.parm: .long PARMAREA
-.lowcase:
- .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
- .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
- .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
- .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
- .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
- .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
- .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
- .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
- .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
- .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
- .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
- .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
- .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
- .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
- .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
- .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
-
- .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
- .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
- .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
- .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
- .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
- .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
- .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
- .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
- .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg
- .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi
- .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop
- .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr
- .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx
- .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz
- .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
- .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
-
-#
# startup-code at 0x10000, running in absolute addressing mode
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
@@ -364,38 +295,38 @@ ENTRY(startup_kdump)
bras %r13,0f
.fill 16,4,0x0
0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
+ sam64 # switch to 64 bit addressing mode
basr %r13,0 # get base
.LPG0:
xc 0x200(256),0x200 # partially clear lowcore
xc 0x300(256),0x300
xc 0xe00(256),0xe00
+ xc 0xf00(256),0xf00
lctlg %c0,%c15,0x200(%r0) # initialize control registers
stck __LC_LAST_UPDATE_CLOCK
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
- xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
- # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
- .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST
- tm __LC_STFL_FAC_LIST,0x01 # stfle available ?
+ stfl 0(%r0) # store facilities @ __LC_STFL_FAC_LIST
+ mvc __LC_STFLE_FAC_LIST(4),__LC_STFL_FAC_LIST
+ tm __LC_STFLE_FAC_LIST,0x01 # stfle available ?
jz 0f
- la %r0,1
- .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended
+ lghi %r0,FACILITIES_ALS_DWORDS-1
+ .insn s,0xb2b00000,__LC_STFLE_FAC_LIST # store facility list extended
# verify if all required facilities are supported by the machine
-0: la %r1,__LC_STFL_FAC_LIST
+0: la %r1,__LC_STFLE_FAC_LIST
la %r2,3f+8-.LPG0(%r13)
- l %r3,0(%r2)
-1: l %r0,0(%r1)
- n %r0,4(%r2)
- cl %r0,4(%r2)
+ lhi %r3,FACILITIES_ALS_DWORDS
+1: lg %r0,0(%r1)
+ ng %r0,0(%r2)
+ clg %r0,0(%r2)
jne 2f
- la %r1,4(%r1)
- la %r2,4(%r2)
+ la %r1,8(%r1)
+ la %r2,8(%r2)
ahi %r3,-1
jnz 1b
j 4f
2: l %r15,.Lstack-.LPG0(%r13)
- ahi %r15,-96
+ ahi %r15,-STACK_FRAME_OVERHEAD
la %r2,.Lals_string-.LPG0(%r13)
l %r3,.Lsclp_print-.LPG0(%r13)
basr %r14,%r3
@@ -410,27 +341,12 @@ ENTRY(startup_kdump)
3: .long 0x000a0000,0x8badcccc
# List of facilities that are required. If not all facilities are present
-# the kernel will crash. Format is number of facility words with bits set,
-# followed by the facility words.
+# the kernel will crash.
+
+ .quad FACILITIES_ALS
-#if defined(CONFIG_MARCH_Z13)
- .long 2, 0xc100eff2, 0xf46cc800
-#elif defined(CONFIG_MARCH_ZEC12)
- .long 2, 0xc100eff2, 0xf46cc800
-#elif defined(CONFIG_MARCH_Z196)
- .long 2, 0xc100eff2, 0xf46c0000
-#elif defined(CONFIG_MARCH_Z10)
- .long 2, 0xc100eff2, 0xf0680000
-#elif defined(CONFIG_MARCH_Z9_109)
- .long 1, 0xc100efc2
-#elif defined(CONFIG_MARCH_Z990)
- .long 1, 0xc0002000
-#elif defined(CONFIG_MARCH_Z900)
- .long 1, 0xc0000000
-#endif
4:
- /* Continue with 64bit startup code in head64.S */
- sam64 # switch to 64 bit mode
+ /* Continue with startup code in head64.S */
jg startup_continue
.align 8
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 58b719fa8067..03c2b469c472 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,7 @@
__HEAD
ENTRY(startup_continue)
- tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ?
+ tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ?
jz 0f
xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
mvi __LC_LPP,0x80 # and set LPP_MAGIC
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f6d8acd7e136..f20abdb5630a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,6 +121,7 @@ static char *dump_type_str(enum dump_type type)
* Must be in data section since the bss section
* is not cleared when these are accessed.
*/
+static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
u32 ipl_flags __attribute__((__section__(".data"))) = 0;
@@ -197,6 +198,33 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
return snprintf(page, PAGE_SIZE, _format, ##args); \
}
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long ssid, devno; \
+ \
+ if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \
+ return -EINVAL; \
+ \
+ if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \
+ return -EINVAL; \
+ \
+ _ipl_blk.ssid = ssid; \
+ _ipl_blk.devno = devno; \
+ return len; \
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
+ _ipl_blk.ssid, _ipl_blk.devno); \
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, (S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store) \
+
#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
@@ -395,7 +423,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.0.%04x\n", ipl_devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
@@ -687,21 +715,14 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
+ size_t scpdata_len = count;
size_t padding;
- size_t scpdata_len;
- if (off < 0)
- return -EINVAL;
-
- if (off >= DIAG308_SCPDATA_SIZE)
- return -ENOSPC;
-
- if (count > DIAG308_SCPDATA_SIZE - off)
- count = DIAG308_SCPDATA_SIZE - off;
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
- scpdata_len = off + count;
+ if (off)
+ return -EINVAL;
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
if (scpdata_len % 8) {
padding = 8 - (scpdata_len % 8);
memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
@@ -717,7 +738,7 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
}
static struct bin_attribute sys_reipl_fcp_scp_data_attr =
__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
- reipl_fcp_scpdata_write, PAGE_SIZE);
+ reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
static struct bin_attribute *reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
@@ -814,9 +835,7 @@ static struct attribute_group reipl_fcp_attr_group = {
};
/* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -1056,8 +1075,8 @@ static void __reipl_run(void *unused)
switch (reipl_method) {
case REIPL_METHOD_CCW_CIO:
+ devid.ssid = reipl_block_ccw->ipl_info.ccw.ssid;
devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case REIPL_METHOD_CCW_VM:
@@ -1192,6 +1211,7 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
+ reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -1336,9 +1356,7 @@ static struct attribute_group dump_fcp_attr_group = {
};
/* CCW dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1418,8 +1436,8 @@ static void __dump_run(void *unused)
switch (dump_method) {
case DUMP_METHOD_CCW_CIO:
+ devid.ssid = dump_block_ccw->ipl_info.ccw.ssid;
devid.devno = dump_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case DUMP_METHOD_CCW_VM:
@@ -1939,14 +1957,14 @@ void __init setup_ipl(void)
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
+ ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
ipl_info.data.ccw.dev_id.devno = ipl_devno;
- ipl_info.data.ccw.dev_id.ssid = 0;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
+ ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.dev_id.devno =
IPL_PARMBLOCK_START->ipl_info.fcp.devno;
- ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
break;
@@ -1978,6 +1996,7 @@ void __init ipl_save_parameters(void)
if (cio_get_iplinfo(&iplinfo))
return;
+ ipl_ssid = iplinfo.ssid;
ipl_devno = iplinfo.devno;
ipl_flags |= IPL_DEVNO_VALID;
if (!iplinfo.is_qdio)
@@ -2020,21 +2039,15 @@ static void do_reset_calls(void)
reset->fn();
}
-u32 dump_prefix_page;
-
-void s390_reset_system(void (*fn_pre)(void),
- void (*fn_post)(void *), void *data)
+void s390_reset_system(void)
{
- struct _lowcore *lc;
+ struct lowcore *lc;
- lc = (struct _lowcore *)(unsigned long) store_prefix();
+ lc = (struct lowcore *)(unsigned long) store_prefix();
/* Stack for interrupt/machine check handler */
lc->panic_stack = S390_lowcore.panic_stack;
- /* Save prefix page address for dump case */
- dump_prefix_page = (u32)(unsigned long) lc;
-
/* Disable prefixing */
set_prefix(0);
@@ -2044,12 +2057,12 @@ void s390_reset_system(void (*fn_pre)(void),
/* Set new machine check handler */
S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.mcck_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+ (unsigned long) s390_base_mcck_handler;
/* Set new program check handler */
S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.program_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ (unsigned long) s390_base_pgm_handler;
/*
* Clear subchannel ID and number to signal new kernel that no CCW or
@@ -2058,14 +2071,5 @@ void s390_reset_system(void (*fn_pre)(void),
S390_lowcore.subchannel_id = 0;
S390_lowcore.subchannel_nr = 0;
- /* Store status at absolute zero */
- store_status();
-
- /* Call function before reset */
- if (fn_pre)
- fn_pre();
do_reset_calls();
- /* Call function after reset */
- if (fn_post)
- fn_post(data);
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f41d5208aaf7..c373a1d41d10 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -164,8 +164,7 @@ void do_softirq_own_stack(void)
{
unsigned long old, new;
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
+ old = current_stack_pointer();
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 389db56a2208..250f5972536a 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
__ctl_load(per_kprobe, 9, 11);
regs->psw.mask |= PSW_MASK_PER;
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
- regs->psw.addr = ip | PSW_ADDR_AMODE;
+ regs->psw.addr = ip;
}
NOKPROBE_SYMBOL(enable_singlestep);
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
regs->psw.mask &= ~PSW_MASK_PER;
regs->psw.mask |= kcb->kprobe_saved_imask;
- regs->psw.addr = ip | PSW_ADDR_AMODE;
+ regs->psw.addr = ip;
}
NOKPROBE_SYMBOL(disable_singlestep);
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
- p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+ p = get_kprobe((void *)(regs->psw.addr - 2));
if (p) {
if (kprobe_running()) {
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
break;
}
- regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+ regs->psw.addr = orig_ret_address;
pop_kprobe(get_kprobe_ctlblk());
kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
static void resume_execution(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+ unsigned long ip = regs->psw.addr;
int fixup = probe_get_fixup_type(p->ainsn.insn);
/* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+ entry = search_exception_tables(regs->psw.addr);
if (entry) {
- regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(entry);
return 1;
}
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
/* setup return addr to the jprobe handler routine */
- regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) jp->entry;
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
/* r15 is the stack pointer */
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index fb0901ec4306..2f1b7217c25c 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -35,46 +35,6 @@ extern const unsigned long long relocate_kernel_len;
#ifdef CONFIG_CRASH_DUMP
/*
- * Create ELF notes for one CPU
- */
-static void add_elf_notes(int cpu)
-{
- struct save_area *sa = (void *) 4608 + store_prefix();
- void *ptr;
-
- memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
- ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
- ptr = fill_cpu_elf_notes(ptr, sa, NULL);
- memset(ptr, 0, sizeof(struct elf_note));
-}
-
-/*
- * Initialize CPU ELF notes
- */
-static void setup_regs(void)
-{
- unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
- struct _lowcore *lc;
- int cpu, this_cpu;
-
- /* Get lowcore pointer from store status of this CPU (absolute zero) */
- lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area;
- this_cpu = smp_find_processor_id(stap());
- add_elf_notes(this_cpu);
- for_each_online_cpu(cpu) {
- if (cpu == this_cpu)
- continue;
- if (smp_store_status(cpu))
- continue;
- add_elf_notes(cpu);
- }
- if (MACHINE_HAS_VX)
- save_vx_regs_safe((void *) lc->vector_save_area_addr);
- /* Copy dump CPU store status info to absolute zero */
- memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
-}
-
-/*
* PM notifier callback for kdump
*/
static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
@@ -105,14 +65,66 @@ static int __init machine_kdump_pm_init(void)
arch_initcall(machine_kdump_pm_init);
/*
- * Start kdump: We expect here that a store status has been done on our CPU
+ * Reset the system, copy boot CPU registers to absolute zero,
+ * and jump to the kdump image
*/
static void __do_machine_kdump(void *image)
{
- int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+ int (*start_kdump)(int);
+ unsigned long prefix;
+
+ /* store_status() saved the prefix register to lowcore */
+ prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+
+ /* Now do the reset */
+ s390_reset_system();
+
+ /*
+ * Copy dump CPU store status info to absolute zero.
+ * This need to be done *after* s390_reset_system set the
+ * prefix register of this CPU to zero
+ */
+ memcpy((void *) __LC_FPREGS_SAVE_AREA,
+ (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
__load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+ start_kdump = (void *)((struct kimage *) image)->start;
start_kdump(1);
+
+ /* Die if start_kdump returns */
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Start kdump: create a LGR log entry, store status of all CPUs and
+ * branch to __do_machine_kdump.
+ */
+static noinline void __machine_kdump(void *image)
+{
+ int this_cpu, cpu;
+
+ lgr_info_log();
+ /* Get status of the other CPUs */
+ this_cpu = smp_find_processor_id(stap());
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ if (smp_store_status(cpu))
+ continue;
+ }
+ /* Store status of the boot CPU */
+ if (MACHINE_HAS_VX)
+ save_vx_regs((void *) &S390_lowcore.vector_save_area);
+ /*
+ * To create a good backchain for this CPU in the dump store_status
+ * is passed the address of a function. The address is saved into
+ * the PSW save area of the boot CPU and the function is invoked as
+ * a tail call of store_status. The backchain in the dump will look
+ * like this:
+ * restart_int_handler -> __machine_kexec -> __do_machine_kdump
+ * The call to store_status() will not return.
+ */
+ store_status(__do_machine_kdump, image);
}
#endif
@@ -235,10 +247,14 @@ static void __do_machine_kexec(void *data)
relocate_kernel_t data_mover;
struct kimage *image = data;
+ s390_reset_system();
data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
/* Call the moving routine */
(*data_mover)(&image->head, image->start);
+
+ /* Die if kexec returns */
+ disabled_wait((unsigned long) __builtin_return_address(0));
}
/*
@@ -251,14 +267,10 @@ static void __machine_kexec(void *data)
tracing_off();
debug_locks_off();
#ifdef CONFIG_CRASH_DUMP
- if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) {
-
- lgr_info_log();
- s390_reset_system(setup_regs, __do_machine_kdump, data);
- } else
+ if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH)
+ __machine_kdump(data);
#endif
- s390_reset_system(NULL, __do_machine_kexec, data);
- disabled_wait((unsigned long) __builtin_return_address(0));
+ __do_machine_kexec(data);
}
/*
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 0c1a679314dd..7873e171457c 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -159,11 +159,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
/* Increase core size by size of got & plt and set start
offsets for got and plt. */
- me->core_size = ALIGN(me->core_size, 4);
- me->arch.got_offset = me->core_size;
- me->core_size += me->arch.got_size;
- me->arch.plt_offset = me->core_size;
- me->core_size += me->arch.plt_size;
+ me->core_layout.size = ALIGN(me->core_layout.size, 4);
+ me->arch.got_offset = me->core_layout.size;
+ me->core_layout.size += me->arch.got_size;
+ me->arch.plt_offset = me->core_layout.size;
+ me->core_layout.size += me->arch.plt_size;
return 0;
}
@@ -279,7 +279,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
if (info->got_initialized == 0) {
Elf_Addr *gotent;
- gotent = me->module_core + me->arch.got_offset +
+ gotent = me->core_layout.base + me->arch.got_offset +
info->got_offset;
*gotent = val;
info->got_initialized = 1;
@@ -302,7 +302,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
rc = apply_rela_bits(loc, val, 0, 64, 0);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->module_core - loc;
+ val += (Elf_Addr) me->core_layout.base - loc;
rc = apply_rela_bits(loc, val, 1, 32, 1);
}
break;
@@ -315,7 +315,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
unsigned int *ip;
- ip = me->module_core + me->arch.plt_offset +
+ ip = me->core_layout.base + me->arch.plt_offset +
info->plt_offset;
ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
ip[1] = 0x100a0004;
@@ -334,7 +334,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val - loc + 0xffffUL < 0x1ffffeUL) ||
(r_type == R_390_PLT32DBL &&
val - loc + 0xffffffffULL < 0x1fffffffeULL)))
- val = (Elf_Addr) me->module_core +
+ val = (Elf_Addr) me->core_layout.base +
me->arch.plt_offset +
info->plt_offset;
val += rela->r_addend - loc;
@@ -356,7 +356,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
case R_390_GOTOFF64: /* 64 bit offset to GOT. */
val = val + rela->r_addend -
- ((Elf_Addr) me->module_core + me->arch.got_offset);
+ ((Elf_Addr) me->core_layout.base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_GOTOFF32)
@@ -366,7 +366,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
- val = (Elf_Addr) me->module_core + me->arch.got_offset +
+ val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
rc = apply_rela_bits(loc, val, 1, 32, 0);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index d112fc66f993..87f05e475ae8 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -89,7 +89,7 @@ static void os_info_old_alloc(int nr, int align)
goto fail;
}
buf_align = PTR_ALIGN(buf, align);
- if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+ if (copy_oldmem_kernel(buf_align, (void *) addr, size)) {
msg = "copy failed";
goto fail_free;
}
@@ -122,14 +122,15 @@ static void os_info_old_init(void)
return;
if (!OLDMEM_BASE)
goto fail;
- if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr)))
goto fail;
if (addr == 0 || addr % PAGE_SIZE)
goto fail;
os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
if (!os_info_old)
goto fail;
- if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+ if (copy_oldmem_kernel(os_info_old, (void *) addr,
+ sizeof(*os_info_old)))
goto fail_free;
if (os_info_old->magic != OS_INFO_MAGIC)
goto fail_free;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 929c147e07b4..62f066b5259e 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event)
/* Validate the counter that is assigned to this event.
* Because the counter facility can use numerous counters at the
- * same time without constraints, it is not necessary to explicity
+ * same time without constraints, it is not necessary to explicitly
* validate event groups (event->group_leader != event).
*/
err = validate_event(hwc);
@@ -670,6 +670,7 @@ static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
flags = PMC_INIT;
smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8da1e742c2..eaab9a7cb3be 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
/*
* A non-zero guest program parameter indicates a guest
* sample.
- * Note that some early samples might be misaccounted to
- * the host.
+ * Note that some early samples or samples from guests without
+ * lpp usage would be misaccounted to the host. We use the asn
+ * value as a heuristic to detect most of these guest samples.
+ * If the value differs from the host hpp value, we assume
+ * it to be a KVM guest.
*/
- if (sfr->basic.gpp)
+ if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp)
sde_regs->in_guest = 1;
overflow = 0;
@@ -1518,7 +1521,7 @@ static int cpumf_pmu_notifier(struct notifier_block *self,
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_DOWN_FAILED:
flags = PMC_INIT;
smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
break;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 61595c1f0a0f..c3e4099b60a5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
static unsigned long instruction_pointer_guest(struct pt_regs *regs)
{
- return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+ return sie_block(regs)->gpsw.addr;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -222,67 +222,23 @@ static int __init service_level_perf_register(void)
}
arch_initcall(service_level_perf_register);
-/* See also arch/s390/kernel/traps.c */
-static unsigned long __store_trace(struct perf_callchain_entry *entry,
- unsigned long sp,
- unsigned long low, unsigned long high)
+static int __perf_callchain_kernel(void *data, unsigned long address)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry,
- sf->gprs[8] & PSW_ADDR_INSN);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
- }
+ struct perf_callchain_entry *entry = data;
+
+ perf_callchain_store(entry, address);
+ return 0;
}
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- unsigned long head;
- struct stack_frame *head_sf;
-
if (user_mode(regs))
return;
-
- head = regs->gprs[15];
- head_sf = (struct stack_frame *) head;
-
- if (!head_sf || !head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
- head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
-
- __store_trace(entry, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
+ dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
}
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
{
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 688a3aad9c79..2bba7df4ac51 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return 0;
low = task_stack_page(tsk);
high = (struct stack_frame *) task_pt_regs(tsk);
- sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+ sf = (struct stack_frame *) tsk->thread.ksp;
if (sf <= low || sf > high)
return 0;
- sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+ sf = (struct stack_frame *) sf->back_chain;
if (sf <= low || sf > high)
return 0;
return sf->gprs[8];
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
memset(&frame->childregs, 0, sizeof(struct pt_regs));
frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- frame->childregs.psw.addr = PSW_ADDR_AMODE |
+ frame->childregs.psw.addr =
(unsigned long) kernel_thread_starter;
frame->childregs.gprs[9] = new_stackp; /* function */
frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
low = task_stack_page(p);
high = (struct stack_frame *) task_pt_regs(p);
- sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+ sf = (struct stack_frame *) p->thread.ksp;
if (sf <= low || sf > high)
return 0;
for (count = 0; count < 16; count++) {
- sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+ sf = (struct stack_frame *) sf->back_chain;
if (sf <= low || sf > high)
return 0;
- return_address = sf->gprs[8] & PSW_ADDR_INSN;
+ return_address = sf->gprs[8];
if (!in_sched_functions(return_address))
return return_address;
}
@@ -243,11 +243,7 @@ unsigned long arch_align_stack(unsigned long sp)
static inline unsigned long brk_rnd(void)
{
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
- else
- return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+ return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 7ce00e7a709a..647128d5b983 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
"edat", "etf3eh", "highgprs", "te", "vx"
};
+ static const char * const int_hwcap_str[] = {
+ "sie"
+ };
unsigned long n = (unsigned long) v - 1;
int i;
@@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
seq_printf(m, "%s ", hwcap_str[i]);
+ for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+ if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", int_hwcap_str[i]);
seq_puts(m, "\n");
show_cacheinfo(m);
}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 01c37b36caf9..49b1c13bf6c9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
new.control |= PER_EVENT_IFETCH;
new.start = 0;
- new.end = PSW_ADDR_INSN;
+ new.end = -1UL;
}
/* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
else if (addr == (addr_t) &dummy->cr11)
/* End address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
- PSW_ADDR_INSN : child->thread.per_user.end;
+ -1UL : child->thread.per_user.end;
else if (addr == (addr_t) &dummy->bits)
/* Single-step bit. */
return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
}
return 0;
default:
- /* Removing high order bit from addr (only for 31 bit). */
- addr &= PSW_ADDR_INSN;
return ptrace_request(child, request, addr, data);
}
}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 52aab0bd84f8..89ea8c213d82 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,60 +9,66 @@
#include <asm/sigp.h>
#
-# store_status
+# Issue "store status" for the current CPU to its prefix page
+# and call passed function afterwards
#
-# Prerequisites to run this function:
-# - Prefix register is set to zero
-# - Original prefix register is stored in "dump_prefix_page"
-# - Lowcore protection is off
+# r2 = Function to be called after store status
+# r3 = Parameter for function
#
ENTRY(store_status)
/* Save register one and load save area base */
stg %r1,__LC_SAVE_AREA_RESTART
- lghi %r1,SAVE_AREA_BASE
/* General purpose registers */
- stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- lg %r2,__LC_SAVE_AREA_RESTART
- stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
+ lghi %r1,__LC_GPREGS_SAVE_AREA
+ stmg %r0,%r15,0(%r1)
+ mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
/* Control registers */
- stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lghi %r1,__LC_CREGS_SAVE_AREA
+ stctg %c0,%c15,0(%r1)
/* Access registers */
- stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lghi %r1,__LC_AREGS_SAVE_AREA
+ stam %a0,%a15,0(%r1)
/* Floating point registers */
- std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lghi %r1,__LC_FPREGS_SAVE_AREA
+ std %f0, 0x00(%r1)
+ std %f1, 0x08(%r1)
+ std %f2, 0x10(%r1)
+ std %f3, 0x18(%r1)
+ std %f4, 0x20(%r1)
+ std %f5, 0x28(%r1)
+ std %f6, 0x30(%r1)
+ std %f7, 0x38(%r1)
+ std %f8, 0x40(%r1)
+ std %f9, 0x48(%r1)
+ std %f10,0x50(%r1)
+ std %f11,0x58(%r1)
+ std %f12,0x60(%r1)
+ std %f13,0x68(%r1)
+ std %f14,0x70(%r1)
+ std %f15,0x78(%r1)
/* Floating point control register */
- stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lghi %r1,__LC_FP_CREG_SAVE_AREA
+ stfpc 0(%r1)
/* CPU timer */
- stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* Saved prefix register */
- larl %r2,dump_prefix_page
- mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
+ lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ stpt 0(%r1)
+ /* Store prefix register */
+ lghi %r1,__LC_PREFIX_SAVE_AREA
+ stpx 0(%r1)
/* Clock comparator - seven bytes */
- larl %r2,.Lclkcmp
- stckc 0(%r2)
- mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
+ lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
+ larl %r4,.Lclkcmp
+ stckc 0(%r4)
+ mvc 1(7,%r1),1(%r4)
/* Program status word */
- epsw %r2,%r3
- st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
- st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
- larl %r2,store_status
- stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
- br %r14
+ lghi %r1,__LC_PSW_SAVE_AREA
+ epsw %r4,%r5
+ st %r4,0(%r1)
+ st %r5,4(%r1)
+ stg %r2,8(%r1)
+ lgr %r1,%r2
+ lgr %r2,%r3
+ br %r1
.section .bss
.align 8
@@ -77,9 +83,11 @@ ENTRY(store_status)
ENTRY(do_reipl_asm)
basr %r13,0
.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
-.Lpg1: brasl %r14,store_status
+.Lpg1: lgr %r3,%r2
+ larl %r2,.Lstatus
+ brasl %r14,store_status
- lctlg %c6,%c6,.Lall-.Lpg0(%r13)
+.Lstatus: lctlg %c6,%c6,.Lall-.Lpg0(%r13)
lgr %r1,%r2
mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
stsch .Lschib-.Lpg0(%r13)
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
index fa0bdff1d413..d88db40bdf15 100644
--- a/arch/s390/kernel/sclp.c
+++ b/arch/s390/kernel/sclp.c
@@ -9,7 +9,11 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define EVTYP_VT220MSG_MASK 0x00000040
+#define EVTYP_MSG_MASK 0x40000000
+
static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+static bool have_vt220, have_linemode;
static void _sclp_wait_int(void)
{
@@ -21,7 +25,7 @@ static void _sclp_wait_int(void)
__ctl_load(cr0_new, 0, 0);
psw_ext_save = S390_lowcore.external_new_psw;
- psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+ psw_mask = __extract_psw();
S390_lowcore.external_new_psw.mask = psw_mask;
psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
S390_lowcore.ext_int_code = 0;
@@ -68,7 +72,7 @@ static int _sclp_setup(int disable)
0x00, 0x1c,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x04,
- 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+ 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
unsigned int *masks;
@@ -82,13 +86,13 @@ static int _sclp_setup(int disable)
rc = _sclp_servc(0x00780005, _sclp_work_area);
if (rc)
return rc;
- if ((masks[0] & masks[3]) != masks[0] ||
- (masks[1] & masks[2]) != masks[1])
- return -EIO;
+ have_vt220 = masks[2] & EVTYP_VT220MSG_MASK;
+ have_linemode = masks[2] & EVTYP_MSG_MASK;
return 0;
}
-static int _sclp_print(const char *str)
+/* Output multi-line text using SCLP Message interface. */
+static void _sclp_print_lm(const char *str)
{
static unsigned char write_head[] = {
/* sccb header */
@@ -143,18 +147,49 @@ static int _sclp_print(const char *str)
} while (ch != 0);
/* SCLP write data */
- return _sclp_servc(0x00760005, _sclp_work_area);
+ _sclp_servc(0x00760005, _sclp_work_area);
}
-int _sclp_print_early(const char *str)
+/* Output multi-line text (plus a newline) using SCLP VT220
+ * interface.
+ */
+static void _sclp_print_vt220(const char *str)
{
- int rc;
+ static unsigned char const write_head[] = {
+ /* sccb header */
+ 0x00, 0x0e,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* evbuf header */
+ 0x00, 0x06,
+ 0x1a, 0x00, 0x00, 0x00,
+ };
+ size_t len = strlen(str);
- rc = _sclp_setup(0);
- if (rc)
- return rc;
- rc = _sclp_print(str);
- if (rc)
- return rc;
- return _sclp_setup(1);
+ if (sizeof(write_head) + len >= sizeof(_sclp_work_area))
+ len = sizeof(_sclp_work_area) - sizeof(write_head) - 1;
+
+ memcpy(_sclp_work_area, write_head, sizeof(write_head));
+ memcpy(_sclp_work_area + sizeof(write_head), str, len);
+ _sclp_work_area[sizeof(write_head) + len] = '\n';
+
+ /* Update length fields in evbuf and sccb headers */
+ *(unsigned short *)(_sclp_work_area + 8) += len + 1;
+ *(unsigned short *)(_sclp_work_area + 0) += len + 1;
+
+ /* SCLP write data */
+ (void)_sclp_servc(0x00760005, _sclp_work_area);
+}
+
+/* Output one or more lines of text on the SCLP console (VT220 and /
+ * or line-mode). All lines get terminated; no need for a trailing LF.
+ */
+void _sclp_print_early(const char *str)
+{
+ if (_sclp_setup(0) != 0)
+ return;
+ if (have_linemode)
+ _sclp_print_lm(str);
+ if (have_vt220)
+ _sclp_print_vt220(str);
+ _sclp_setup(1);
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ce0cbd6ba7ca..d3f9688f26b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq);
unsigned long elf_hwcap __read_mostly = 0;
char elf_platform[ELF_PLATFORM_SIZE];
+unsigned long int_hwcap = 0;
+
int __initdata memory_end_set;
unsigned long __initdata memory_end;
unsigned long __initdata max_physmem_end;
@@ -97,7 +99,7 @@ unsigned long MODULES_VADDR;
unsigned long MODULES_END;
/* An array with a pointer to the lowcore of every CPU. */
-struct _lowcore *lowcore_ptr[NR_CPUS];
+struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
/*
@@ -291,33 +293,29 @@ void *restart_stack __attribute__((__section__(".data")));
static void __init setup_lowcore(void)
{
- struct _lowcore *lc;
+ struct lowcore *lc;
/*
* Setup lowcore for boot cpu
*/
- BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
+ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+ lc->restart_psw.addr = (unsigned long) restart_int_handler;
lc->external_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->external_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+ lc->external_new_psw.addr = (unsigned long) ext_int_handler;
lc->svc_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+ lc->svc_new_psw.addr = (unsigned long) system_call;
lc->program_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->program_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+ lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
- lc->mcck_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+ lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
lc->io_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+ lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = -1ULL;
lc->kernel_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
@@ -329,6 +327,7 @@ static void __init setup_lowcore(void)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
+ lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
@@ -376,17 +375,17 @@ static void __init setup_lowcore(void)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata *standard_resources[] = {
@@ -410,7 +409,7 @@ static void __init setup_resources(void)
for_each_memblock(memory, reg) {
res = alloc_bootmem_low(sizeof(*res));
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = reg->base;
@@ -661,15 +660,6 @@ static void __init reserve_kernel(void)
#endif
}
-static void __init reserve_elfcorehdr(void)
-{
-#ifdef CONFIG_CRASH_DUMP
- if (is_kdump_kernel())
- memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
- PAGE_ALIGN(elfcorehdr_size));
-#endif
-}
-
static void __init setup_memory(void)
{
struct memblock_region *reg;
@@ -764,9 +754,6 @@ static int __init setup_hwcaps(void)
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
- case 0x9672:
- strcpy(elf_platform, "g5");
- break;
case 0x2064:
case 0x2066:
default: /* Use "z900" as default for 64 bit kernels. */
@@ -793,9 +780,17 @@ static int __init setup_hwcaps(void)
strcpy(elf_platform, "zEC12");
break;
case 0x2964:
+ case 0x2965:
strcpy(elf_platform, "z13");
break;
}
+
+ /*
+ * Virtualization support HWCAP_INT_SIE is bit 0.
+ */
+ if (sclp.has_sief2)
+ int_hwcap |= HWCAP_INT_SIE;
+
return 0;
}
arch_initcall(setup_hwcaps);
@@ -844,6 +839,11 @@ void __init setup_arch(char **cmdline_p)
init_mm.brk = (unsigned long) &_end;
parse_early_param();
+#ifdef CONFIG_CRASH_DUMP
+ /* Deactivate elfcorehdr= kernel parameter */
+ elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
os_info_init();
setup_ipl();
@@ -852,7 +852,6 @@ void __init setup_arch(char **cmdline_p)
reserve_oldmem();
reserve_kernel();
reserve_initrd();
- reserve_elfcorehdr();
memblock_allow_resize();
/* Get information about *all* installed memory */
@@ -873,11 +872,13 @@ void __init setup_arch(char **cmdline_p)
check_initrd();
reserve_crashkernel();
+#ifdef CONFIG_CRASH_DUMP
/*
* Be aware that smp_save_dump_cpus() triggers a system reset.
* Therefore CPU and device initialization should be done afterwards.
*/
smp_save_dump_cpus();
+#endif
setup_resources();
setup_vmcoreinfo();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 028cc46cb82a..d82562cf0a0e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+ restorer = (unsigned long) ka->sa.sa_restorer;
} else {
/* Signal frame without vector registers are short ! */
__u16 __user *svc = (void __user *) frame + frame_size - 2;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
return -EFAULT;
- restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+ restorer = (unsigned long) svc;
}
/* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
(PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) ka->sa.sa_handler;
regs->gprs[2] = sig;
regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- restorer = (unsigned long)
- ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+ restorer = (unsigned long) ksig->ka.sa.sa_restorer;
} else {
__u16 __user *svc = &frame->svc_insn;
if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
return -EFAULT;
- restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+ restorer = (unsigned long) svc;
}
/* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
(PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
regs->gprs[2] = ksig->sig;
regs->gprs[3] = (unsigned long) &frame->info;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 9062df575afe..40a6b4f9c36c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -64,8 +64,9 @@ enum {
static DEFINE_PER_CPU(struct cpu *, cpu_device);
struct pcpu {
- struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
+ struct lowcore *lowcore; /* lowcore page(s) for the cpu */
unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ unsigned long ec_clk; /* sigp timestamp for ec_xxx */
signed char state; /* physical cpu state */
signed char polarization; /* physical polarization */
u16 address; /* physical cpu address */
@@ -80,6 +81,10 @@ EXPORT_SYMBOL(smp_cpu_mt_shift);
unsigned int smp_cpu_mtid;
EXPORT_SYMBOL(smp_cpu_mtid);
+#ifdef CONFIG_CRASH_DUMP
+__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+#endif
+
static unsigned int smp_max_threads __initdata = -1U;
static int __init early_nosmt(char *s)
@@ -105,8 +110,7 @@ DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm,
- u32 *status)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
{
int cc;
@@ -171,6 +175,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
return;
order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ pcpu->ec_clk = get_tod_clock_fast();
pcpu_sigp_retry(pcpu, order, 0);
}
@@ -180,10 +185,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, panic_stack;
- struct _lowcore *lc;
+ struct lowcore *lc;
if (pcpu != &pcpu_devices[0]) {
- pcpu->lowcore = (struct _lowcore *)
+ pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
panic_stack = __get_free_page(GFP_KERNEL);
@@ -235,7 +240,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
- struct _lowcore *lc = pcpu->lowcore;
+ struct lowcore *lc = pcpu->lowcore;
if (MACHINE_HAS_TLB_LC)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
@@ -255,7 +260,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
{
- struct _lowcore *lc = pcpu->lowcore;
+ struct lowcore *lc = pcpu->lowcore;
struct thread_info *ti = task_thread_info(tsk);
lc->kernel_stack = (unsigned long) task_stack_page(tsk)
@@ -271,7 +276,7 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
{
- struct _lowcore *lc = pcpu->lowcore;
+ struct lowcore *lc = pcpu->lowcore;
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
@@ -286,7 +291,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
void *data, unsigned long stack)
{
- struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
+ struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned long source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS);
@@ -538,53 +543,24 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
#ifdef CONFIG_CRASH_DUMP
-static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
- u16 address, int is_boot_cpu)
-{
- void *lc = (void *)(unsigned long) store_prefix();
- unsigned long vx_sa;
-
- if (is_boot_cpu) {
- /* Copy the registers of the boot CPU. */
- copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
- SAVE_AREA_BASE - PAGE_SIZE, 0);
- if (MACHINE_HAS_VX)
- save_vx_regs_safe(sa_ext->vx_regs);
- return;
- }
- /* Get the registers of a non-boot cpu. */
- __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
- memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa));
- if (!MACHINE_HAS_VX)
- return;
- /* Get the VX registers */
- vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!vx_sa)
- panic("could not allocate memory for VX save area\n");
- __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
- memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
- memblock_free(vx_sa, PAGE_SIZE);
-}
-
int smp_store_status(int cpu)
{
- unsigned long vx_sa;
- struct pcpu *pcpu;
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ unsigned long pa;
- pcpu = pcpu_devices + cpu;
- if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
- 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ pa = __pa(&pcpu->lowcore->floating_pt_save_area);
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
if (!MACHINE_HAS_VX)
return 0;
- vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
- __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
- vx_sa, NULL);
+ pa = __pa(pcpu->lowcore->vector_save_area_addr);
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
return 0;
}
-#endif /* CONFIG_CRASH_DUMP */
-
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
@@ -593,7 +569,7 @@ int smp_store_status(int cpu)
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The boot CPU state is located in
* the absolute lowcore of the memory stored in the HSA. The zcore code
- * will allocate the save area and copy the boot CPU state from the HSA.
+ * will copy the boot CPU state from the HSA.
* 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
* condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
* The state for all CPUs except the boot CPU needs to be collected
@@ -608,55 +584,76 @@ int smp_store_status(int cpu)
* stored the registers of the boot CPU in the memory of the old system.
* 4) kdump and the old kernel stored the CPU state
* condition: OLDMEM_BASE != NULL && is_kdump_kernel()
- * The state of all CPUs is stored in ELF sections in the memory of the
- * old system. The ELF sections are picked up by the crash_dump code
- * via elfcorehdr_addr.
+ * This case does not exist for s390 anymore, setup_arch explicitly
+ * deactivates the elfcorehdr= kernel parameter
*/
+static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
+ bool is_boot_cpu, unsigned long page)
+{
+ __vector128 *vxrs = (__vector128 *) page;
+
+ if (is_boot_cpu)
+ vxrs = boot_cpu_vector_save_area;
+ else
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page);
+ save_area_add_vxrs(sa, vxrs);
+}
+
+static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
+ bool is_boot_cpu, unsigned long page)
+{
+ void *regs = (void *) page;
+
+ if (is_boot_cpu)
+ copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512);
+ else
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page);
+ save_area_add_regs(sa, regs);
+}
+
void __init smp_save_dump_cpus(void)
{
-#ifdef CONFIG_CRASH_DUMP
- int addr, cpu, boot_cpu_addr, max_cpu_addr;
- struct save_area_ext *sa_ext;
+ int addr, boot_cpu_addr, max_cpu_addr;
+ struct save_area *sa;
+ unsigned long page;
bool is_boot_cpu;
- if (is_kdump_kernel())
- /* Previous system stored the CPU states. Nothing to do. */
- return;
if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
/* No previous system present, normal boot. */
return;
+ /* Allocate a page as dumping area for the store status sigps */
+ page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
/* Set multi-threading state to the previous system. */
pcpu_set_smt(sclp.mtid_prev);
- max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
- for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
- if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
- SIGP_CC_NOT_OPERATIONAL)
- continue;
- cpu += 1;
- }
- dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8);
- dump_save_areas.count = cpu;
boot_cpu_addr = stap();
- for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
- if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8);
- dump_save_areas.areas[cpu] = sa_ext;
- if (!sa_ext)
- panic("could not allocate memory for save area\n");
is_boot_cpu = (addr == boot_cpu_addr);
- cpu += 1;
- if (is_boot_cpu && !OLDMEM_BASE)
- /* Skip boot CPU for standard zfcp dump. */
- continue;
- /* Get state for this CPU. */
- __smp_store_cpu_state(sa_ext, addr, is_boot_cpu);
+ /* Allocate save area */
+ sa = save_area_alloc(is_boot_cpu);
+ if (!sa)
+ panic("could not allocate memory for save area\n");
+ if (MACHINE_HAS_VX)
+ /* Get the vector registers */
+ smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
+ /*
+ * For a zfcp dump OLDMEM_BASE == NULL and the registers
+ * of the boot CPU are stored in the HSA. To retrieve
+ * these registers an SCLP request is required which is
+ * done by drivers/s390/char/zcore.c:init_cpu_info()
+ */
+ if (!is_boot_cpu || OLDMEM_BASE)
+ /* Get the CPU registers */
+ smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
}
+ memblock_free(page, PAGE_SIZE);
diag308_reset();
pcpu_set_smt(0);
-#endif /* CONFIG_CRASH_DUMP */
}
+#endif /* CONFIG_CRASH_DUMP */
void smp_cpu_set_polarization(int cpu, int val)
{
@@ -680,7 +677,7 @@ static struct sclp_core_info *smp_get_core_info(void)
for (address = 0;
address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
address += (1U << smp_cpu_mt_shift)) {
- if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
+ if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
info->core[info->configured].core_id =
@@ -801,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
@@ -924,7 +921,7 @@ void __init smp_prepare_boot_cpu(void)
pcpu->state = CPU_STATE_CONFIGURED;
pcpu->address = stap();
- pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
+ pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix();
S390_lowcore.percpu_offset = __per_cpu_offset[0];
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
set_cpu_present(0, true);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 1785cd82253c..44f84b23d4e5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,87 +10,64 @@
#include <linux/kallsyms.h>
#include <linux/module.h>
-static unsigned long save_context_stack(struct stack_trace *trace,
- unsigned long sp,
- unsigned long low,
- unsigned long high,
- int savesched)
+static int __save_address(void *data, unsigned long address, int nosched)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
- unsigned long addr;
+ struct stack_trace *trace = data;
- while(1) {
- sp &= PSW_ADDR_INSN;
- if (sp < low || sp > high)
- return sp;
- sf = (struct stack_frame *)sp;
- while(1) {
- addr = sf->gprs[8] & PSW_ADDR_INSN;
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *)sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long)(sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *)sp;
- addr = regs->psw.addr & PSW_ADDR_INSN;
- if (savesched || !in_sched_functions(addr)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- }
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = regs->gprs[15];
+ if (nosched && in_sched_functions(address))
+ return 0;
+ if (trace->skip > 0) {
+ trace->skip--;
+ return 0;
}
+ if (trace->nr_entries < trace->max_entries) {
+ trace->entries[trace->nr_entries++] = address;
+ return 0;
+ }
+ return 1;
+}
+
+static int save_address(void *data, unsigned long address)
+{
+ return __save_address(data, address, 0);
+}
+
+static int save_address_nosched(void *data, unsigned long address)
+{
+ return __save_address(data, address, 1);
}
void save_stack_trace(struct stack_trace *trace)
{
- register unsigned long sp asm ("15");
- unsigned long orig_sp, new_sp;
+ unsigned long sp;
- orig_sp = sp & PSW_ADDR_INSN;
- new_sp = save_context_stack(trace, orig_sp,
- S390_lowcore.panic_stack - PAGE_SIZE,
- S390_lowcore.panic_stack, 1);
- if (new_sp != orig_sp)
- return;
- new_sp = save_context_stack(trace, new_sp,
- S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack, 1);
- if (new_sp != orig_sp)
- return;
- save_context_stack(trace, new_sp,
- S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE, 1);
+ sp = current_stack_pointer();
+ dump_trace(save_address, trace, NULL, sp);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- unsigned long sp, low, high;
+ unsigned long sp;
- sp = tsk->thread.ksp & PSW_ADDR_INSN;
- low = (unsigned long) task_stack_page(tsk);
- high = (unsigned long) task_pt_regs(tsk);
- save_context_stack(trace, sp, low, high, 0);
+ sp = tsk->thread.ksp;
+ if (tsk == current)
+ sp = current_stack_pointer();
+ dump_trace(save_address_nosched, trace, tsk, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ sp = kernel_stack_pointer(regs);
+ dump_trace(save_address, trace, NULL, sp);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 8c56929c8d82..9b59e6212d8f 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -382,3 +382,7 @@ SYSCALL(sys_sendmsg,compat_sys_sendmsg) /* 370 */
SYSCALL(sys_recvfrom,compat_sys_recvfrom)
SYSCALL(sys_recvmsg,compat_sys_recvmsg)
SYSCALL(sys_shutdown,sys_shutdown)
+SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
+SYSCALL(sys_preadv2,compat_sys_preadv2)
+SYSCALL(sys_pwritev2,compat_sys_pwritev2)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 99babea026ca..f7dba3887a54 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -111,8 +111,7 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
{
- static int max_mnest;
- int i, rc;
+ int i;
seq_putc(m, '\n');
if (!MACHINE_HAS_TOPOLOGY)
@@ -123,7 +122,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
seq_printf(m, " %d", info->mag[i]);
seq_putc(m, '\n');
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_TOPOLOGY
store_topology(info);
seq_printf(m, "CPU Topology SW: ");
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
@@ -145,6 +144,10 @@ static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ if (info->mt_installed) {
+ seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid);
+ }
/*
* Sigh 2. According to the specification the alternate
* capability field is a 32 bit floating point number
@@ -194,13 +197,10 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
- if (info->mt_installed & 0x80) {
- seq_printf(m, "LPAR CPUs G-MTID: %d\n",
- info->mt_general & 0x1f);
- seq_printf(m, "LPAR CPUs S-MTID: %d\n",
- info->mt_installed & 0x1f);
- seq_printf(m, "LPAR CPUs PS-MTID: %d\n",
- info->mt_psmtid & 0x1f);
+ if (info->mt_installed) {
+ seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid);
+ seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid);
}
}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..9409d32f285e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -499,8 +499,7 @@ static void etr_reset(void)
if (etr_port0_online && etr_port1_online)
set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
} else if (etr_port0_online || etr_port1_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an ETR interface\n");
+ pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
etr_port0_online = etr_port1_online = 0;
}
}
@@ -1433,7 +1432,7 @@ device_initcall(etr_init_sysfs);
/*
* Server Time Protocol (STP) code.
*/
-static int stp_online;
+static bool stp_online;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -1444,11 +1443,7 @@ static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
- if (strncmp(p, "off", 3) == 0)
- stp_online = 0;
- else if (strncmp(p, "on", 2) == 0)
- stp_online = 1;
- return 0;
+ return kstrtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
@@ -1464,8 +1459,7 @@ static void __init stp_reset(void)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an STP interface\n");
+ pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = 0;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..64298a867589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,7 @@ static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
-static int topology_enabled = 1;
+static bool topology_enabled = true;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
@@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
static int __init early_parse_topology(char *p)
{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
+ return kstrtobool(p, &topology_enabled);
}
early_param("topology", early_parse_topology);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
index 73239bb576c4..dde7654f5c68 100644
--- a/arch/s390/kernel/trace.c
+++ b/arch/s390/kernel/trace.c
@@ -9,20 +9,23 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/diag.h>
-EXPORT_TRACEPOINT_SYMBOL(diagnose);
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
-void trace_diagnose_norecursion(int diag_nr)
+void trace_s390_diagnose_norecursion(int diag_nr)
{
unsigned long flags;
unsigned int *depth;
+ /* Avoid lockdep recursion. */
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ return;
local_irq_save(flags);
depth = this_cpu_ptr(&diagnose_trace_depth);
if (*depth == 0) {
(*depth)++;
- trace_diagnose(diag_nr);
+ trace_s390_diagnose(diag_nr);
(*depth)--;
}
local_irq_restore(flags);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1b18118bbc06..dd97a3e8a34a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -22,8 +22,6 @@
#include <asm/fpu/api.h>
#include "entry.h"
-int show_unhandled_signals = 1;
-
static inline void __user *get_trap_ip(struct pt_regs *regs)
{
unsigned long address;
@@ -32,23 +30,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
address = *(unsigned long *)(current->thread.trap_tdb + 24);
else
address = regs->psw.addr;
- return (void __user *)
- ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-}
-
-static inline void report_user_fault(struct pt_regs *regs, int signr)
-{
- if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
- return;
- if (!unhandled_signal(current, signr))
- return;
- if (!printk_ratelimit())
- return;
- printk("User process fault: interruption code %04x ilc:%d ",
- regs->int_code & 0xffff, regs->int_code >> 17);
- print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
- printk("\n");
- show_regs(regs);
+ return (void __user *) (address - (regs->int_code >> 16));
}
int is_valid_bugaddr(unsigned long addr)
@@ -66,16 +48,16 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
info.si_code = si_code;
info.si_addr = get_trap_ip(regs);
force_sig_info(si_signo, &info, current);
- report_user_fault(regs, si_signo);
+ report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+ fixup = search_exception_tables(regs->psw.addr);
if (fixup)
- regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup);
else {
enum bug_trap_type btt;
- btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+ btt = report_bug(regs->psw.addr, regs);
if (btt == BUG_TRAP_TYPE_WARN)
return;
die(regs, str);
@@ -112,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap);
void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 0);
do_exit(SIGSEGV);
} else
die(regs, "Unknown program exception");
@@ -260,11 +242,8 @@ void vector_exception(struct pt_regs *regs)
void data_exception(struct pt_regs *regs)
{
- __u16 __user *location;
int signal = 0;
- location = get_trap_ip(regs);
-
save_fpu_regs();
if (current->thread.fpu.fpc & FPC_DXC_MASK)
signal = SIGFPE;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 59eddb0e1a3e..94495cac8be3 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -80,7 +80,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
/*
* Setup vdso data page.
*/
-static void vdso_init_data(struct vdso_data *vd)
+static void __init vdso_init_data(struct vdso_data *vd)
{
vd->ectg_available = test_facility(31);
}
@@ -90,9 +90,10 @@ static void vdso_init_data(struct vdso_data *vd)
*/
#define SEGMENT_ORDER 2
-int vdso_alloc_per_cpu(struct _lowcore *lowcore)
+int vdso_alloc_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
+ struct vdso_per_cpu_data *vd;
u32 *psal, *aste;
int i;
@@ -107,6 +108,12 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
if (!segment_table || !page_table || !page_frame)
goto out;
+ /* Initialize per-cpu vdso data page */
+ vd = (struct vdso_per_cpu_data *) page_frame;
+ vd->cpu_nr = lowcore->cpu_nr;
+ vd->node_id = cpu_to_node(vd->cpu_nr);
+
+ /* Set up access register mode page table */
clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
PAGE_SIZE << SEGMENT_ORDER);
clear_table((unsigned long *) page_table, _PAGE_INVALID,
@@ -138,7 +145,7 @@ out:
return -ENOMEM;
}
-void vdso_free_per_cpu(struct _lowcore *lowcore)
+void vdso_free_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
@@ -163,7 +170,7 @@ static void vdso_init_cr5(void)
if (!vdso_enabled)
return;
- cr5 = offsetof(struct _lowcore, paste);
+ cr5 = offsetof(struct lowcore, paste);
__ctl_load(cr5, 5, 5);
}
@@ -299,8 +306,6 @@ static int __init vdso_init(void)
get_page(virt_to_page(vdso_data));
- smp_mb();
-
return 0;
}
early_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index ee8a18e50a25..f9c459586649 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
# Build rules
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
new file mode 100644
index 000000000000..c1ed0b72030f
--- /dev/null
+++ b/arch/s390/kernel/vdso32/getcpu.S
@@ -0,0 +1,43 @@
+/*
+ * Userland implementation of getcpu() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .align 4
+ .globl __kernel_getcpu
+ .type __kernel_getcpu,@function
+__kernel_getcpu:
+ .cfi_startproc
+ ear %r1,%a4
+ lhi %r4,1
+ sll %r4,24
+ sar %a4,%r4
+ la %r4,0
+ epsw %r0,0
+ sacf 512
+ l %r5,__VDSO_CPU_NR(%r4)
+ l %r4,__VDSO_NODE_ID(%r4)
+ tml %r0,0x4000
+ jo 1f
+ tml %r0,0x8000
+ jno 0f
+ sacf 256
+ j 1f
+0: sacf 0
+1: sar %a4,%r1
+ ltr %r2,%r2
+ jz 2f
+ st %r5,0(%r2)
+2: ltr %r3,%r3
+ jz 3f
+ st %r4,0(%r3)
+3: lhi %r2,0
+ br %r14
+ .cfi_endproc
+ .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index a8c379fa1247..8f048c2d6d13 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -132,6 +132,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getcpu;
local: *;
};
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index c4b03f9ed228..058659c1b8cf 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
# Build rules
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
new file mode 100644
index 000000000000..4cbe98291931
--- /dev/null
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -0,0 +1,42 @@
+/*
+ * Userland implementation of getcpu() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .align 4
+ .globl __kernel_getcpu
+ .type __kernel_getcpu,@function
+__kernel_getcpu:
+ .cfi_startproc
+ ear %r1,%a4
+ llilh %r4,0x0100
+ sar %a4,%r4
+ la %r4,0
+ epsw %r0,0
+ sacf 512
+ l %r5,__VDSO_CPU_NR(%r4)
+ l %r4,__VDSO_NODE_ID(%r4)
+ tml %r0,0x4000
+ jo 1f
+ tml %r0,0x8000
+ jno 0f
+ sacf 256
+ j 1f
+0: sacf 0
+1: sar %a4,%r1
+ ltgr %r2,%r2
+ jz 2f
+ st %r5,0(%r2)
+2: ltgr %r3,%r3
+ jz 3f
+ st %r4,0(%r3)
+3: lghi %r2,0
+ br %r14
+ .cfi_endproc
+ .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 9f5979d102a9..f35455d497fe 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -132,6 +132,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getcpu;
local: *;
};
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 445657fe658c..0f41a8286378 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -28,6 +28,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
} :text = 0x0700
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52cf0e57..5ea5af3c7db7 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select SRCU
+ select KVM_VFIO
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b553469650..d42fa38c2429 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 5fbfb88f8477..1ea4095b67d7 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
@@ -155,10 +156,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tcpu;
int tid;
- int i;
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
vcpu->stat.diagnose_9c++;
@@ -167,12 +166,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
if (tid == vcpu->vcpu_id)
return 0;
- kvm_for_each_vcpu(i, tcpu, kvm)
- if (tcpu->vcpu_id == tid) {
- kvm_vcpu_yield_to(tcpu);
- break;
- }
-
+ tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
+ if (tcpu)
+ kvm_vcpu_yield_to(tcpu);
return 0;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a7559f7207df..66938d283b77 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -259,10 +259,14 @@ struct aste {
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
- union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+ if (vcpu->arch.sie_block->eca & 1) {
+ int rc;
- if (vcpu->arch.sie_block->eca & 1)
- return ic->kh != 0;
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ return rc;
+ }
return vcpu->kvm->arch.ipte_lock_count != 0;
}
@@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
vcpu->kvm->arch.ipte_lock_count++;
if (vcpu->kvm->arch.ipte_lock_count > 1)
goto out;
- ic = &vcpu->kvm->arch.sca->ipte_control;
+retry:
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
- while (old.k) {
+ if (old.k) {
+ read_unlock(&vcpu->kvm->arch.sca_lock);
cond_resched();
- old = READ_ONCE(*ic);
+ goto retry;
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
out:
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
}
@@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
vcpu->kvm->arch.ipte_lock_count--;
if (vcpu->kvm->arch.ipte_lock_count)
goto out;
- ic = &vcpu->kvm->arch.sca->ipte_control;
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
wake_up(&vcpu->kvm->arch.ipte_wq);
out:
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
@@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
- ic = &vcpu->kvm->arch.sca->ipte_control;
+retry:
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
- while (old.kg) {
+ if (old.kg) {
+ read_unlock(&vcpu->kvm->arch.sca_lock);
cond_resched();
- old = READ_ONCE(*ic);
+ goto retry;
}
new = old;
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
}
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
- ic = &vcpu->kvm->arch.sca->ipte_control;
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
new = old;
@@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
if (!new.kh)
wake_up(&vcpu->kvm->arch.ipte_wq);
}
@@ -357,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
- int write)
+ enum gacc_mode mode)
{
union alet alet;
struct ale ale;
@@ -438,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
}
}
- if (ale.fo == 1 && write)
+ if (ale.fo == 1 && mode == GACC_STORE)
return PGM_PROTECTION;
asce->val = aste.asce;
@@ -461,25 +477,28 @@ enum {
};
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, int write)
+ ar_t ar, enum gacc_mode mode)
{
int rc;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
struct trans_exc_code_bits *tec_bits;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw_bits(*psw).as;
+ tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw.as;
- if (!psw_bits(*psw).t) {
+ if (!psw.t) {
asce->val = 0;
asce->r = 1;
return 0;
}
- switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ if (mode == GACC_IFETCH)
+ psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
+
+ switch (psw.as) {
case PSW_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
@@ -490,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
case PSW_AS_ACCREG:
- rc = ar_translation(vcpu, asce, ar, write);
+ rc = ar_translation(vcpu, asce, ar, mode);
switch (rc) {
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -522,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
- * @write: indicates if access is a write access
+ * @mode: indicates the access mode to be used
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -538,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- int write)
+ enum gacc_mode mode)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
@@ -683,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (write && dat_protection)
+ if (mode == GACC_STORE && dat_protection)
return PGM_PROTECTION;
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
@@ -712,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
- const union asce asce, int write)
+ const union asce asce, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -724,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
- if (write && lap_enabled && is_low_address(ga)) {
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
pgm->code = PGM_PROTECTION;
return pgm->code;
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, asce, write);
+ rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
@@ -752,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write)
+ unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long _len, nr_pages, gpa, idx;
@@ -764,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -776,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode == GACC_STORE)
rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
else
rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
@@ -796,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write)
+ void *data, unsigned long len, enum gacc_mode mode)
{
unsigned long _len, gpa;
int rc = 0;
@@ -804,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode)
rc = write_guest_abs(vcpu, gpa, data, _len);
else
rc = read_guest_abs(vcpu, gpa, data, _len);
@@ -825,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* has to take care of this.
*/
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long *gpa, int write)
+ unsigned long *gpa, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -835,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
gva = kvm_s390_logical_to_effective(vcpu, gva);
tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
tec->addr = gva >> PAGE_SHIFT;
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (write) {
+ if (mode == GACC_STORE) {
rc = pgm->code = PGM_PROTECTION;
return rc;
}
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, write);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0) {
if (rc == PGM_PROTECTION)
tec->b61 = 1;
@@ -867,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
* check_gva_range - test a range of guest virtual addresses for accessibility
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write)
+ unsigned long length, enum gacc_mode mode)
{
unsigned long gpa;
unsigned long currlen;
@@ -876,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
ipte_lock(vcpu);
while (length > 0 && !rc) {
currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+ rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
gva += currlen;
length -= currlen;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ef03726cc661..df0a79dd8159 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
+enum gacc_mode {
+ GACC_FETCH,
+ GACC_STORE,
+ GACC_IFETCH,
+};
+
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- ar_t ar, unsigned long *gpa, int write);
+ ar_t ar, unsigned long *gpa, enum gacc_mode mode);
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write);
+ unsigned long length, enum gacc_mode mode);
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write);
+ unsigned long len, enum gacc_mode mode);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write);
+ void *data, unsigned long len, enum gacc_mode mode);
/**
* write_guest - copy data from kernel space to guest space
@@ -215,7 +221,7 @@ static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 1);
+ return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
}
/**
@@ -235,7 +241,27 @@ static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 0);
+ return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+}
+
+/**
+ * read_guest_instr - copy instruction data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from the current psw address (guest space) to @data (kernel
+ * space).
+ *
+ * The behaviour of read_guest_instr is identical to read_guest, except that
+ * instruction data will be read from primary space when in home-space or
+ * address-space mode.
+ */
+static inline __must_check
+int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
+{
+ return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
+ GACC_IFETCH);
}
/**
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 47518a324d75..e8c6843b9600 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -17,7 +17,7 @@
/*
* Extends the address range given by *start and *stop to include the address
* range starting with estart and the length len. Takes care of overflowing
- * intervals and tries to minimize the overall intervall size.
+ * intervals and tries to minimize the overall interval size.
*/
static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
{
@@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
return;
/*
- * If the guest is not interrested in branching events, we can savely
+ * If the guest is not interested in branching events, we can safely
* limit them to the PER address range.
*/
if (!(*cr9 & PER_EVENT_BRANCH))
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
*cr9 &= ~PER_CONTROL_ALTERATION;
*cr10 = 0;
- *cr11 = PSW_ADDR_INSN;
+ *cr11 = -1UL;
} else {
*cr9 &= ~PER_CONTROL_ALTERATION;
*cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
vcpu->arch.sie_block->gcr[10] = 0;
- vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+ vcpu->arch.sie_block->gcr[11] = -1UL;
}
if (guestdbg_hw_bp_enabled(vcpu)) {
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b4a5aa110cec..2e6b54e4d3f9 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,25 +38,37 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xeb] = kvm_s390_handle_eb,
};
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+ u8 ilen = 0;
- /* Use the length of the EXECUTE instruction if necessary */
- if (sie_block->icptstatus & 1) {
- ilc = (sie_block->icptstatus >> 4) & 0x6;
- if (!ilc)
- ilc = 4;
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* instruction only stored for these icptcodes */
+ ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
+ /* Use the length of the EXECUTE instruction if necessary */
+ if (sie_block->icptstatus & 1) {
+ ilen = (sie_block->icptstatus >> 4) & 0x6;
+ if (!ilen)
+ ilen = 4;
+ }
+ break;
+ case ICPT_PROGI:
+ /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
+ ilen = vcpu->arch.sie_block->pgmilc & 0x6;
+ break;
}
- sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+ return ilen;
}
static int handle_noop(struct kvm_vcpu *vcpu)
{
switch (vcpu->arch.sie_block->icptcode) {
- case 0x0:
- vcpu->stat.exit_null++;
- break;
case 0x10:
vcpu->stat.exit_external_request++;
break;
@@ -124,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
-static void __extract_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
{
- memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
- pgm_info->code = vcpu->arch.sie_block->iprcc;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = vcpu->arch.sie_block->iprcc,
+ /* the PSW has already been rewound */
+ .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
+ };
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@@ -141,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -149,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@@ -157,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
- pgm_info->op_access_id = vcpu->arch.sie_block->oai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
- pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
- pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_VECTOR_PROCESSING:
case PGM_DATA:
- pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+ pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
- pgm_info->per_code = vcpu->arch.sie_block->perc;
- pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
- pgm_info->per_address = vcpu->arch.sie_block->peraddr;
- pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+ pgm_info.per_code = vcpu->arch.sie_block->perc;
+ pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info.per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
}
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
/*
@@ -211,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
static int handle_prog(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_pgm_info pgm_info;
psw_t psw;
int rc;
@@ -237,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- __extract_prog_irq(vcpu, &pgm_info);
- return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+ return inject_prog_on_prog_intercept(vcpu);
}
/**
@@ -305,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the source is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- reg2, &srcaddr, 0);
+ reg2, &srcaddr, GACC_FETCH);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -314,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the destination is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- reg1, &dstaddr, 1);
+ reg1, &dstaddr, GACC_STORE);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
if (rc != 0)
return rc;
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
return 0;
}
@@ -338,8 +351,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
+ if (kvm_is_ucontrol(vcpu->kvm))
+ return -EOPNOTSUPP;
+
switch (vcpu->arch.sie_block->icptcode) {
- case 0x00:
case 0x10:
case 0x18:
return handle_noop(vcpu);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 373e32346d68..84efc2ba6a90 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -23,6 +23,7 @@
#include <asm/uaccess.h>
#include <asm/sclp.h>
#include <asm/isc.h>
+#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
@@ -34,6 +35,106 @@
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
+/* handle external calls via sigp interpretation facility */
+static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
+{
+ int c, scn;
+
+ if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ return 0;
+
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl sigp_ctrl =
+ sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+ c = sigp_ctrl.c;
+ scn = sigp_ctrl.scn;
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl sigp_ctrl =
+ sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+ c = sigp_ctrl.c;
+ scn = sigp_ctrl.scn;
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+
+ if (src_id)
+ *src_id = scn;
+
+ return c;
+}
+
+static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+{
+ int expect, rc;
+
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+
+ expect = old_val.value;
+ rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+
+ expect = old_val.value;
+ rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+
+ if (rc != expect) {
+ /* another external call is pending */
+ return -EBUSY;
+ }
+ atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ return 0;
+}
+
+static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc, expect;
+
+ atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union esca_sigp_ctrl old = *sigp_ctrl;
+
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union bsca_sigp_ctrl old = *sigp_ctrl;
+
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ WARN_ON(rc != expect); /* cannot clear? */
+}
+
int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -82,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.sie_block->cputm >> 63) &&
- cpu_timer_interrupts_enabled(vcpu);
+ if (!cpu_timer_interrupts_enabled(vcpu))
+ return 0;
+ return kvm_s390_get_cpu_timer(vcpu) >> 63;
}
static inline int is_ioirq(unsigned long irq_type)
@@ -235,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
set_intercept_indicators_stop(vcpu);
}
-static u16 get_ilc(struct kvm_vcpu *vcpu)
-{
- switch (vcpu->arch.sie_block->icptcode) {
- case ICPT_INST:
- case ICPT_INSTPROGI:
- case ICPT_OPEREXC:
- case ICPT_PARTEXEC:
- case ICPT_IOINST:
- /* last instruction only stored for these icptcodes */
- return insn_length(vcpu->arch.sie_block->ipa >> 8);
- case ICPT_PROGI:
- return vcpu->arch.sie_block->pgmilc;
- default:
- return 0;
- }
-}
-
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -399,9 +484,9 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
rc = write_guest_lc(vcpu,
- offsetof(struct _lowcore, restart_old_psw),
+ offsetof(struct lowcore, restart_old_psw),
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return rc ? -EFAULT : 0;
@@ -488,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_pgm_info pgm_info;
int rc = 0, nullifying = false;
- u16 ilc = get_ilc(vcpu);
+ u16 ilen;
spin_lock(&li->lock);
pgm_info = li->irq.pgm;
@@ -496,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
memset(&li->irq.pgm, 0, sizeof(pgm_info));
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
- pgm_info.code, ilc);
+ ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
+ VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
+ pgm_info.code, ilen);
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
@@ -581,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *) __LC_PER_ACCESS_ID);
}
- if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
- kvm_s390_rewind_psw(vcpu, ilc);
+ if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
+ kvm_s390_rewind_psw(vcpu, ilen);
- rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+ /* bit 1+2 of the target are the ilc, so we can directly use ilen */
+ rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_LAST_BREAK);
rc |= put_guest_lc(vcpu, pgm_info.code,
@@ -792,13 +879,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!sclp.has_sigpif)
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
- return (sigp_ctrl & SIGP_CTRL_C) &&
- (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
+ return sca_ext_call_pending(vcpu, NULL);
}
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
@@ -825,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
}
+static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
+{
+ u64 now, cputm, sltime = 0;
+
+ if (ckc_interrupts_enabled(vcpu)) {
+ now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+ sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ /* already expired or overflow? */
+ if (!sltime || vcpu->arch.sie_block->ckc <= now)
+ return 0;
+ if (cpu_timer_interrupts_enabled(vcpu)) {
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (cputm >> 63)
+ return 0;
+ return min(sltime, tod_to_ns(cputm));
+ }
+ } else if (cpu_timer_interrupts_enabled(vcpu)) {
+ sltime = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (sltime >> 63)
+ return 0;
+ }
+ return sltime;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
- u64 now, sltime;
+ u64 sltime;
vcpu->stat.exit_wait_state++;
@@ -840,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
- if (!ckc_interrupts_enabled(vcpu)) {
+ if (!ckc_interrupts_enabled(vcpu) &&
+ !cpu_timer_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
__set_cpu_idle(vcpu);
goto no_timer;
}
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-
- /* underflow */
- if (vcpu->arch.sie_block->ckc < now)
+ sltime = __calculate_sltime(vcpu);
+ if (!sltime)
return 0;
__set_cpu_idle(vcpu);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
- VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
+ VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
@@ -868,13 +977,13 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- if (waitqueue_active(&vcpu->wq)) {
+ if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
- wake_up_interruptible(&vcpu->wq);
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -882,18 +991,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- u64 now, sltime;
+ u64 sltime;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ sltime = __calculate_sltime(vcpu);
/*
* If the monotonic clock runs faster than the tod clock we might be
* woken up too early and have to go back to sleep to avoid deadlocks.
*/
- if (vcpu->arch.sie_block->ckc > now &&
- hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
@@ -909,9 +1016,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
memset(&li->irq, 0, sizeof(li->irq));
spin_unlock(&li->lock);
- /* clear pending external calls set by sigp interpretation facility */
- atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
- vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
+ sca_clear_ext_call(vcpu);
}
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -963,8 +1068,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
irq->u.pgm.code, 0);
+ if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
+ /* auto detection if no valid ILC was given */
+ irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
+ irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
+ irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
+ }
+
if (irq->u.pgm.code == PGM_PER) {
li->irq.pgm.code |= PGM_PER;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify PER related information */
li->irq.pgm.per_address = irq->u.pgm.per_address;
li->irq.pgm.per_code = irq->u.pgm.per_code;
@@ -973,6 +1086,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
} else if (!(irq->u.pgm.code & PGM_PER)) {
li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
irq->u.pgm.code;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify non-PER information */
li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
li->irq.pgm.mon_code = irq->u.pgm.mon_code;
@@ -1003,21 +1117,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
return 0;
}
-static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
-{
- unsigned char new_val, old_val;
- uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
-
- new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
- old_val = *sigp_ctrl & ~SIGP_CTRL_C;
- if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
- /* another external call is pending */
- return -EBUSY;
- }
- atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
- return 0;
-}
-
static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -1030,12 +1129,11 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
src_id, 0);
/* sending vcpu invalid */
- if (src_id >= KVM_MAX_VCPUS ||
- kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+ if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
if (sclp.has_sigpif)
- return __inject_extcall_sigpif(vcpu, src_id);
+ return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
@@ -1110,6 +1208,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
irq->u.emerg.code, 0);
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+ return -EINVAL;
+
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
@@ -2200,7 +2302,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li,
int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
{
- uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+ int scn;
unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
unsigned long pending_irqs;
@@ -2240,14 +2342,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
}
}
- if ((sigp_ctrl & SIGP_CTRL_C) &&
- (atomic_read(&vcpu->arch.sie_block->cpuflags) &
- CPUSTAT_ECALL_PEND)) {
+ if (sca_ext_call_pending(vcpu, &scn)) {
if (n + sizeof(irq) > len)
return -ENOBUFS;
memset(&irq, 0, sizeof(irq));
irq.type = KVM_S390_INT_EXTERNAL_CALL;
- irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+ irq.u.extcall.code = scn;
if (copy_to_user(&buf[n], &irq, sizeof(irq)))
return -EFAULT;
n += sizeof(irq);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8fe2f1c722dc..668c087513e5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <asm/lowcore.h>
#include <asm/etr.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
#include <asm/isc.h>
@@ -158,6 +159,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start += *delta;
}
}
return NOTIFY_OK;
@@ -246,7 +249,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
- r = KVM_MAX_VCPUS;
+ r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
+ : KVM_S390_BSCA_CPU_SLOTS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
@@ -257,6 +261,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VECTOR_REGISTERS:
r = MACHINE_HAS_VX;
break;
+ case KVM_CAP_S390_RI:
+ r = test_facility(64);
+ break;
default:
r = 0;
}
@@ -270,19 +277,22 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
- down_read(&gmap->mm->mmap_sem);
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
- if (gmap_test_and_clear_dirty(address, gmap))
+ if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
+ if (fatal_signal_pending(current))
+ return;
+ cond_resched();
}
- up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
+static void sca_del_vcpu(struct kvm_vcpu *vcpu);
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -342,15 +352,33 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
r = 0;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
- if (MACHINE_HAS_VX) {
- set_kvm_facility(kvm->arch.model.fac->mask, 129);
- set_kvm_facility(kvm->arch.model.fac->list, 129);
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (MACHINE_HAS_VX) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 129);
+ set_kvm_facility(kvm->arch.model.fac_list, 129);
r = 0;
} else
r = -EINVAL;
+ mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)");
break;
+ case KVM_CAP_S390_RI:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (test_facility(64)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 64);
+ set_kvm_facility(kvm->arch.model.fac_list, 64);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
+ r ? "(not available)" : "(success)");
+ break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@@ -371,8 +399,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
- kvm->arch.gmap->asce_end);
- if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
+ kvm->arch.mem_limit);
+ if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
ret = -EFAULT;
break;
default:
@@ -424,9 +452,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (get_user(new_limit, (u64 __user *)attr->addr))
return -EFAULT;
- if (new_limit > kvm->arch.gmap->asce_end)
+ if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
+ new_limit > kvm->arch.mem_limit)
return -E2BIG;
+ if (!new_limit)
+ return -EINVAL;
+
+ /* gmap_alloc takes last usable address */
+ if (new_limit != KVM_S390_NO_MEM_LIMIT)
+ new_limit -= 1;
+
ret = -EBUSY;
mutex_lock(&kvm->lock);
if (atomic_read(&kvm->online_vcpus) == 0) {
@@ -443,7 +479,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
}
mutex_unlock(&kvm->lock);
- VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
+ VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
+ VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ (void *) kvm->arch.gmap->asce);
break;
}
default:
@@ -617,7 +655,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
sizeof(struct cpuid));
kvm->arch.model.ibc = proc->ibc;
- memcpy(kvm->arch.model.fac->list, proc->fac_list,
+ memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
@@ -651,7 +689,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
proc->ibc = kvm->arch.model.ibc;
- memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
+ memcpy(&proc->fac_list, kvm->arch.model.fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
@@ -671,7 +710,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp.ibc;
- memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+ memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1020,7 +1059,7 @@ static int kvm_s390_apxa_installed(void)
u8 config[128];
int cc;
- if (test_facility(2) && test_facility(12)) {
+ if (test_facility(12)) {
cc = kvm_s390_query_ap_config(config);
if (cc)
@@ -1048,16 +1087,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
cpu_id->version = 0xff;
}
-static int kvm_s390_crypto_init(struct kvm *kvm)
+static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
- return 0;
-
- kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
- GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.crypto.crycb)
- return -ENOMEM;
+ return;
+ kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
/* Enable AES/DEA protected key functions by default */
@@ -1067,8 +1102,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+}
- return 0;
+static void sca_dispose(struct kvm *kvm)
+{
+ if (kvm->arch.use_esca)
+ free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
+ else
+ free_page((unsigned long)(kvm->arch.sca));
+ kvm->arch.sca = NULL;
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
@@ -1094,14 +1136,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
rc = -ENOMEM;
- kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+ kvm->arch.use_esca = 0; /* start with basic SCA */
+ rwlock_init(&kvm->arch.sca_lock);
+ kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
sca_offset += 16;
- if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+ if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
- kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+ kvm->arch.sca = (struct bsca_block *)
+ ((char *) kvm->arch.sca + sca_offset);
spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
@@ -1110,37 +1155,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
- /*
- * The architectural maximum amount of facilities is 16 kbit. To store
- * this amount, 2 kbyte of memory is required. Thus we need a full
- * page to hold the guest facility list (arch.model.fac->list) and the
- * facility mask (arch.model.fac->mask). Its address size has to be
- * 31 bits and word aligned.
- */
- kvm->arch.model.fac =
- (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.model.fac)
+ kvm->arch.sie_page2 =
+ (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.sie_page2)
goto out_err;
/* Populate the facility mask initially. */
- memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+ memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
- kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
+ kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
else
- kvm->arch.model.fac->mask[i] = 0UL;
+ kvm->arch.model.fac_mask[i] = 0UL;
}
/* Populate the facility list initially. */
- memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+ kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
+ memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
- if (kvm_s390_crypto_init(kvm) < 0)
- goto out_err;
+ kvm_s390_crypto_init(kvm);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -1153,8 +1191,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
+ kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
} else {
- kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
+ if (sclp.hamax == U64_MAX)
+ kvm->arch.mem_limit = TASK_MAX_SIZE;
+ else
+ kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
+ sclp.hamax + 1);
+ kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
if (!kvm->arch.gmap)
goto out_err;
kvm->arch.gmap->private = kvm;
@@ -1166,14 +1210,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
- KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
out_err:
- kfree(kvm->arch.crypto.crycb);
- free_page((unsigned long)kvm->arch.model.fac);
+ free_page((unsigned long)kvm->arch.sie_page2);
debug_unregister(kvm->arch.dbf);
- free_page((unsigned long)(kvm->arch.sca));
+ sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
return rc;
}
@@ -1184,14 +1227,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
kvm_clear_async_pf_completion_queue(vcpu);
- if (!kvm_is_ucontrol(vcpu->kvm)) {
- clear_bit(63 - vcpu->vcpu_id,
- (unsigned long *) &vcpu->kvm->arch.sca->mcn);
- if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
- (__u64) vcpu->arch.sie_block)
- vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
- }
- smp_mb();
+ if (!kvm_is_ucontrol(vcpu->kvm))
+ sca_del_vcpu(vcpu);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
@@ -1223,15 +1260,14 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
- free_page((unsigned long)kvm->arch.model.fac);
- free_page((unsigned long)(kvm->arch.sca));
+ sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
- kfree(kvm->arch.crypto.crycb);
+ free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
- KVM_EVENT(3, "vm 0x%p destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
/* Section: vcpu related */
@@ -1245,6 +1281,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
+static void sca_del_vcpu(struct kvm_vcpu *vcpu)
+{
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+
+ clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = 0;
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+ clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = 0;
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+static void sca_add_vcpu(struct kvm_vcpu *vcpu)
+{
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+
+ sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+ vcpu->arch.sie_block->ecb2 |= 0x04U;
+ set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+ sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+ set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+/* Basic SCA to Extended SCA data copy routines */
+static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
+{
+ d->sda = s->sda;
+ d->sigp_ctrl.c = s->sigp_ctrl.c;
+ d->sigp_ctrl.scn = s->sigp_ctrl.scn;
+}
+
+static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
+{
+ int i;
+
+ d->ipte_control = s->ipte_control;
+ d->mcn[0] = s->mcn;
+ for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
+ sca_copy_entry(&d->cpu[i], &s->cpu[i]);
+}
+
+static int sca_switch_to_extended(struct kvm *kvm)
+{
+ struct bsca_block *old_sca = kvm->arch.sca;
+ struct esca_block *new_sca;
+ struct kvm_vcpu *vcpu;
+ unsigned int vcpu_idx;
+ u32 scaol, scaoh;
+
+ new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+ if (!new_sca)
+ return -ENOMEM;
+
+ scaoh = (u32)((u64)(new_sca) >> 32);
+ scaol = (u32)(u64)(new_sca) & ~0x3fU;
+
+ kvm_s390_vcpu_block_all(kvm);
+ write_lock(&kvm->arch.sca_lock);
+
+ sca_copy_b_to_e(new_sca, old_sca);
+
+ kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
+ vcpu->arch.sie_block->scaoh = scaoh;
+ vcpu->arch.sie_block->scaol = scaol;
+ vcpu->arch.sie_block->ecb2 |= 0x04U;
+ }
+ kvm->arch.sca = new_sca;
+ kvm->arch.use_esca = 1;
+
+ write_unlock(&kvm->arch.sca_lock);
+ kvm_s390_vcpu_unblock_all(kvm);
+
+ free_page((unsigned long)old_sca);
+
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ old_sca, kvm->arch.sca);
+ return 0;
+}
+
+static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
+{
+ int rc;
+
+ if (id < KVM_S390_BSCA_CPU_SLOTS)
+ return true;
+ if (!sclp.has_esca)
+ return false;
+
+ mutex_lock(&kvm->lock);
+ rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+ mutex_unlock(&kvm->lock);
+
+ return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
+}
+
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1255,8 +1402,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
- if (test_kvm_facility(vcpu->kvm, 129))
+ if (test_kvm_facility(vcpu->kvm, 64))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1264,44 +1418,105 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination. Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
- dst->fpc = current->thread.fpu.fpc;
- dst->regs = current->thread.fpu.regs;
+ WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
- current->thread.fpu.fpc = from->fpc;
- current->thread.fpu.regs = from->regs;
+ WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ vcpu->arch.cputm_start = 0;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
- /* Save host register state */
- save_fpu_regs();
- save_fpu_to(&vcpu->arch.host_fpregs);
+ WARN_ON_ONCE(vcpu->arch.cputm_enabled);
+ vcpu->arch.cputm_enabled = true;
+ __start_cpu_timer_accounting(vcpu);
+}
- if (test_kvm_facility(vcpu->kvm, 129)) {
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
+ __stop_cpu_timer_accounting(vcpu);
+ vcpu->arch.cputm_enabled = false;
+}
+
+static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __enable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __disable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+/* set the cpu timer - may only be called from the VCPU thread itself */
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ vcpu->arch.sie_block->cputm = cputm;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+ preempt_enable();
+}
+
+/* update and get the cpu timer - can also be called from other VCPU threads */
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned int seq;
+ __u64 value;
+
+ if (unlikely(!vcpu->arch.cputm_enabled))
+ return vcpu->arch.sie_block->cputm;
+
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ do {
+ seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
/*
- * Use the register save area in the SIE-control block
- * for register restore and save in kvm_arch_vcpu_put()
+ * If the writer would ever execute a read in the critical
+ * section, e.g. in irq context, we have a deadlock.
*/
- current->thread.fpu.vxrs =
- (__vector128 *)&vcpu->run->s.regs.vrs;
- } else
- load_fpu_from(&vcpu->arch.guest_fpregs);
+ WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
+ value = vcpu->arch.sie_block->cputm;
+ /* if cputm_start is 0, accounting is being started/stopped */
+ if (likely(vcpu->arch.cputm_start))
+ value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
+ preempt_enable();
+ return value;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ /* Save host register state */
+ save_fpu_regs();
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
@@ -1310,26 +1525,26 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __start_cpu_timer_accounting(vcpu);
+ vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ vcpu->cpu = -1;
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __stop_cpu_timer_accounting(vcpu);
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
+ /* Save guest register state */
save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- if (test_kvm_facility(vcpu->kvm, 129))
- /*
- * kvm_arch_vcpu_load() set up the register save area to
- * the &vcpu->run->s.regs.vrs and, thus, the vector registers
- * are already saved. Only the floating-point control must be
- * copied.
- */
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- else
- save_fpu_to(&vcpu->arch.guest_fpregs);
- load_fpu_from(&vcpu->arch.host_fpregs);
+ /* Restore host register state */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
@@ -1341,14 +1556,15 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
kvm_s390_set_prefix(vcpu, 0);
- vcpu->arch.sie_block->cputm = 0UL;
+ kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
vcpu->arch.sie_block->gcr[0] = 0xE0UL;
vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
- vcpu->arch.guest_fpregs.fpc = 0;
- asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ /* make sure the new fpc will be lazily loaded */
+ save_fpu_regs();
+ current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1365,8 +1581,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
preempt_enable();
mutex_unlock(&vcpu->kvm->lock);
- if (!kvm_is_ucontrol(vcpu->kvm))
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+ sca_add_vcpu(vcpu);
+ }
+
}
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1407,7 +1626,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
vcpu->arch.cpu_id = model->cpu_id;
vcpu->arch.sie_block->ibc = model->ibc;
- vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+ if (test_kvm_facility(vcpu->kvm, 7))
+ vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1435,10 +1655,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= 1;
if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= 0x10000000U;
+ if (test_kvm_facility(vcpu->kvm, 64))
+ vcpu->arch.sie_block->ecb3 |= 0x01;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= 0x00020000;
vcpu->arch.sie_block->ecd |= 0x20000000;
}
+ vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (vcpu->kvm->arch.use_cmma) {
@@ -1461,7 +1684,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
struct sie_page *sie_page;
int rc = -EINVAL;
- if (id >= KVM_MAX_VCPUS)
+ if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
goto out;
rc = -ENOMEM;
@@ -1478,42 +1701,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
vcpu->arch.sie_block->icpua = id;
- if (!kvm_is_ucontrol(kvm)) {
- if (!kvm->arch.sca) {
- WARN_ON_ONCE(1);
- goto out_free_cpu;
- }
- if (!kvm->arch.sca->cpu[id].sda)
- kvm->arch.sca->cpu[id].sda =
- (__u64) vcpu->arch.sie_block;
- vcpu->arch.sie_block->scaoh =
- (__u32)(((__u64)kvm->arch.sca) >> 32);
- vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
- set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
- }
-
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
-
- /*
- * Allocate a save area for floating-point registers. If the vector
- * extension is available, register contents are saved in the SIE
- * control block. The allocated save area is still required in
- * particular places, for example, in kvm_s390_vcpu_store_status().
- */
- vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
- GFP_KERNEL);
- if (!vcpu->arch.guest_fpregs.fprs) {
- rc = -ENOMEM;
- goto out_free_sie_block;
- }
+ seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
- VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+ VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
@@ -1608,7 +1805,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = put_user(vcpu->arch.sie_block->cputm,
+ r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CLOCK_COMP:
@@ -1646,6 +1843,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
+ __u64 val;
switch (reg->id) {
case KVM_REG_S390_TODPR:
@@ -1657,8 +1855,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = get_user(vcpu->arch.sie_block->cputm,
- (u64 __user *)reg->addr);
+ r = get_user(val, (u64 __user *)reg->addr);
+ if (!r)
+ kvm_s390_set_cpu_timer(vcpu, val);
break;
case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
@@ -1730,19 +1929,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ /* make sure the new values will be lazily loaded */
+ save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- save_fpu_regs();
- load_fpu_from(&vcpu->arch.guest_fpregs);
+ current->thread.fpu.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+ else
+ memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
- fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+ else
+ memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+ fpu->fpc = current->thread.fpu.fpc;
return 0;
}
@@ -2009,7 +2216,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
*/
kvm_check_async_pf_completion(vcpu);
- memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
+ vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
+ vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
if (need_resched())
schedule();
@@ -2042,8 +2250,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- u8 opcode;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_ADDRESSING,
+ };
+ u8 opcode, ilen;
int rc;
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
@@ -2057,18 +2267,25 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
* to look up the current opcode to get the length of the instruction
* to be able to forward the PSW.
*/
- rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
- if (rc)
- return kvm_s390_inject_prog_cond(vcpu, rc);
- psw->addr = __rewind_psw(*psw, -insn_length(opcode));
-
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = read_guest_instr(vcpu, &opcode, 1);
+ ilen = insn_length(opcode);
+ if (rc < 0) {
+ return rc;
+ } else if (rc) {
+ /* Instruction-Fetching Exceptions - we can't detect the ilen.
+ * Forward by arbitrary ilc, injection will take care of
+ * nullification if necessary.
+ */
+ pgm_info = vcpu->arch.pgm;
+ ilen = 4;
+ }
+ pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
+ kvm_s390_forward_psw(vcpu, ilen);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
- int rc = -1;
-
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2076,40 +2293,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
if (guestdbg_enabled(vcpu))
kvm_s390_restore_guest_per_regs(vcpu);
- if (exit_reason >= 0) {
- rc = 0;
+ vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
+ vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+
+ if (vcpu->arch.sie_block->icptcode > 0) {
+ int rc = kvm_handle_sie_intercept(vcpu);
+
+ if (rc != -EOPNOTSUPP)
+ return rc;
+ vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
+ vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+ vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+ vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+ return -EREMOTE;
+ } else if (exit_reason != -EFAULT) {
+ vcpu->stat.exit_null++;
+ return 0;
} else if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
vcpu->run->s390_ucontrol.trans_exc_code =
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
- rc = -EREMOTE;
-
+ return -EREMOTE;
} else if (current->thread.gmap_pfault) {
trace_kvm_s390_major_guest_pfault(vcpu);
current->thread.gmap_pfault = 0;
- if (kvm_arch_setup_async_pf(vcpu)) {
- rc = 0;
- } else {
- gpa_t gpa = current->thread.gmap_addr;
- rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
- }
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
-
- if (rc == -1)
- rc = vcpu_post_run_fault_in_sie(vcpu);
-
- memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
-
- if (rc == 0) {
- if (kvm_is_ucontrol(vcpu->kvm))
- /* Don't exit for host interrupts. */
- rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
- else
- rc = kvm_handle_sie_intercept(vcpu);
- }
-
- return rc;
+ return vcpu_post_run_fault_in_sie(vcpu);
}
static int __vcpu_run(struct kvm_vcpu *vcpu)
@@ -2134,10 +2347,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
local_irq_disable();
__kvm_guest_enter();
+ __disable_cpu_timer_accounting(vcpu);
local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
local_irq_disable();
+ __enable_cpu_timer_accounting(vcpu);
__kvm_guest_exit();
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2161,7 +2376,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
@@ -2183,7 +2398,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
- kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+ kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
@@ -2215,6 +2430,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
sync_regs(vcpu, kvm_run);
+ enable_cpu_timer_accounting(vcpu);
might_fault();
rc = __vcpu_run(vcpu);
@@ -2229,21 +2445,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
- if (rc == -EOPNOTSUPP) {
- /* intercept cannot be handled in-kernel, prepare kvm-run */
- kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
- kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
- kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
- kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
- rc = 0;
- }
-
if (rc == -EREMOTE) {
- /* intercept was handled, but userspace support is needed
- * kvm_run has been prepared by the handler */
+ /* userspace support is needed, kvm_run has been prepared */
rc = 0;
}
+ disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
@@ -2262,41 +2469,51 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
+ freg_t fprs[NUM_FPRS];
unsigned int px;
- u64 clkcomp;
+ u64 clkcomp, cputm;
int rc;
+ px = kvm_s390_get_prefix(vcpu);
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
- gpa = SAVE_AREA_BASE;
+ gpa = 0;
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
- gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+ gpa = px;
+ } else
+ gpa -= __LC_FPREGS_SAVE_AREA;
+
+ /* manually convert vector registers if necessary */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ fprs, 128);
+ } else {
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.fprs, 128);
}
- rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
- vcpu->arch.guest_fpregs.fprs, 128);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+ rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
&vcpu->arch.sie_block->gpsw, 16);
- px = kvm_s390_get_prefix(vcpu);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+ rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
&px, 4);
- rc |= write_guest_abs(vcpu,
- gpa + offsetof(struct save_area, fp_ctrl_reg),
- &vcpu->arch.guest_fpregs.fpc, 4);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+ rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
+ &vcpu->run->s.regs.fpc, 4);
+ rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
- &vcpu->arch.sie_block->cputm, 8);
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
+ &cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+ rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
&vcpu->run->s.regs.acrs, 64);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
&vcpu->arch.sie_block->gcr, 128);
return rc ? -EFAULT : 0;
}
@@ -2309,19 +2526,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* it into the save area
*/
save_fpu_regs();
- if (test_kvm_facility(vcpu->kvm, 129)) {
- /*
- * If the vector extension is available, the vector registers
- * which overlaps with floating-point registers are saved in
- * the SIE-control block. Hence, extract the floating-point
- * registers and the FPC value and store them in the
- * guest_fpregs structure.
- */
- vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
- convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
- current->thread.fpu.vxrs);
- } else
- save_fpu_to(&vcpu->arch.guest_fpregs);
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2508,7 +2713,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_FETCH);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@@ -2519,7 +2725,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_STORE);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
@@ -2732,6 +2939,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->memory_size & 0xffffful)
return -EINVAL;
+ if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+ return -EINVAL;
+
return 0;
}
@@ -2763,6 +2973,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
static int __init kvm_s390_init(void)
{
+ if (!sclp.has_sief2) {
+ pr_info("SIE not available\n");
+ return -ENODEV;
+ }
+
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 1e70e00d3c5e..8621ab00ec8e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,6 +19,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/facility.h>
+#include <asm/processor.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
+static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
+}
+
static inline int kvm_is_ucontrol(struct kvm *kvm)
{
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
/* test availability of facility in a kvm instance */
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
{
- return __test_facility(nr, kvm->arch.model.fac->mask) &&
- __test_facility(nr, kvm->arch.model.fac->list);
+ return __test_facility(nr, kvm->arch.model.fac_mask) &&
+ __test_facility(nr, kvm->arch.model.fac_list);
}
static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
@@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm,
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in intercept.c */
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+ sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
+}
+static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ kvm_s390_rewind_psw(vcpu, -ilen);
+}
+static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
+}
/* implemented in priv.c */
int is_valid_psw(psw_t *psw);
@@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
unsigned long kvm_s390_fac_list_mask_size(void);
extern unsigned long kvm_s390_fac_list_mask[];
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
@@ -340,4 +362,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+/* support for Basic/Extended SCA handling */
+static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
+{
+ struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
+
+ return &sca->ipte_control;
+}
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 77191b85ea7a..0a1591d3d25d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -23,6 +23,7 @@
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
@@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
if (psw_bits(vcpu->arch.sie_block->gpsw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
@@ -354,8 +355,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
* into a u32 memory representation. They will remain bits 0-31.
*/
- fac = *vcpu->kvm->arch.model.fac->list >> 32;
- rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+ fac = *vcpu->kvm->arch.model.fac_list >> 32;
+ rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
&fac, sizeof(fac));
if (rc)
return rc;
@@ -660,7 +661,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- if (!MACHINE_HAS_PFMF)
+ if (!test_kvm_facility(vcpu->kvm, 8))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -759,8 +760,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Rewind PSW to repeat the ESSA instruction */
- kvm_s390_rewind_psw(vcpu, 4);
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
@@ -981,11 +982,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
if (ret == PGM_PROTECTION) {
/* Write protected? Try again with read-only... */
cc = 1;
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa,
+ GACC_FETCH);
}
if (ret) {
if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index da690b69f9fe..77c22d685c7a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
u16 cpu_addr, u32 parameter, u64 *status_reg)
{
int rc;
- struct kvm_vcpu *dst_vcpu;
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
-
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
@@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
if (order_code == SIGP_EXTERNAL_CALL) {
- dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
kvm_s390_vcpu_wakeup(dest_vcpu);
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index cc1d6c68356f..396485bca191 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
- __entry->vcpu, __entry->sie_block)
+ TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ __entry->id, __entry->vcpu, __entry->sie_block)
);
TRACE_EVENT(kvm_s390_destroy_vcpu,
@@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %p)\n",
+ TP_printk("enabling channel I/O support (kvm @ %pK)\n",
__entry->kvm)
);
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 0e8fefe5b0ce..1d1af31e8354 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
#
lib-y += delay.o string.o uaccess.o find.o
-obj-y += mem.o
+obj-y += mem.o xor.o
lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 427aa44b3505..d4549c964589 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -37,12 +37,22 @@ static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old)
asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock));
}
+static inline int cpu_is_preempted(int cpu)
+{
+ if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
+ return 0;
+ if (smp_vcpu_scheduled(cpu))
+ return 0;
+ return 1;
+}
+
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
unsigned int cpu = SPINLOCK_LOCKVAL;
unsigned int owner;
- int count;
+ int count, first_diag;
+ first_diag = 1;
while (1) {
owner = ACCESS_ONCE(lp->lock);
/* Try to get the lock if it is free. */
@@ -51,9 +61,10 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
return;
continue;
}
- /* Check if the lock owner is running. */
- if (!smp_vcpu_scheduled(~owner)) {
+ /* First iteration: check if the lock owner is running. */
+ if (first_diag && cpu_is_preempted(~owner)) {
smp_yield_cpu(~owner);
+ first_diag = 0;
continue;
}
/* Loop for a while on the lock value. */
@@ -67,10 +78,13 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU if the lock is still unavailable.
+ * yield the CPU unconditionally. For LPAR rely on the
+ * sense running status.
*/
- if (!MACHINE_IS_LPAR)
+ if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) {
smp_yield_cpu(~owner);
+ first_diag = 0;
+ }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -79,9 +93,10 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
unsigned int cpu = SPINLOCK_LOCKVAL;
unsigned int owner;
- int count;
+ int count, first_diag;
local_irq_restore(flags);
+ first_diag = 1;
while (1) {
owner = ACCESS_ONCE(lp->lock);
/* Try to get the lock if it is free. */
@@ -92,8 +107,9 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
local_irq_restore(flags);
}
/* Check if the lock owner is running. */
- if (!smp_vcpu_scheduled(~owner)) {
+ if (first_diag && cpu_is_preempted(~owner)) {
smp_yield_cpu(~owner);
+ first_diag = 0;
continue;
}
/* Loop for a while on the lock value. */
@@ -107,10 +123,13 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU if the lock is still unavailable.
+ * yield the CPU unconditionally. For LPAR rely on the
+ * sense running status.
*/
- if (!MACHINE_IS_LPAR)
+ if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) {
smp_yield_cpu(~owner);
+ first_diag = 0;
+ }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait_flags);
@@ -145,7 +164,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
owner = 0;
while (1) {
if (count-- <= 0) {
- if (owner && !smp_vcpu_scheduled(~owner))
+ if (owner && cpu_is_preempted(~owner))
smp_yield_cpu(~owner);
count = spin_retry;
}
@@ -191,7 +210,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
owner = 0;
while (1) {
if (count-- <= 0) {
- if (owner && !smp_vcpu_scheduled(~owner))
+ if (owner && cpu_is_preempted(~owner))
smp_yield_cpu(~owner);
count = spin_retry;
}
@@ -221,7 +240,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
owner = 0;
while (1) {
if (count-- <= 0) {
- if (owner && !smp_vcpu_scheduled(~owner))
+ if (owner && cpu_is_preempted(~owner))
smp_yield_cpu(~owner);
count = spin_retry;
}
@@ -265,7 +284,7 @@ void arch_lock_relax(unsigned int cpu)
{
if (!cpu)
return;
- if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu))
+ if (MACHINE_IS_LPAR && !cpu_is_preempted(~cpu))
return;
smp_yield_cpu(~cpu);
}
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000000..7d94e3ec34a9
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,134 @@
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/raid/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ "3:\n"
+ : : "d" (bytes), "a" (p1), "a" (p2)
+ : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ /* Get around a gcc oddity */
+ register unsigned long *reg7 asm ("7") = p5;
+
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " xc 0(256,%1),0(%5)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " la %5,256(%5)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " ex %0,18(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ " xc 0(1,%1),0(%5)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+ "+a" (reg7)
+ : : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+ .name = "xc",
+ .do_2 = xor_xc_2,
+ .do_3 = xor_xc_3,
+ .do_4 = xor_xc_4,
+ .do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 839592ca265c..0aa0ad165d8b 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,11 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
-obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o
+obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
+obj-y += page-states.o gup.o pageattr.o mem_detect.o
+obj-y += pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PGSTE) += gmap.o
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
deleted file mode 100644
index 4d1ee88864e8..000000000000
--- a/arch/s390/mm/extable.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <asm/uaccess.h>
-
-/*
- * Search one exception table for an entry corresponding to the
- * given instruction address, and return the address of the entry,
- * or NULL if none is found.
- * We use a binary search, and thus we assume that the table is
- * already sorted.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- const struct exception_table_entry *mid;
- unsigned long addr;
-
- while (first <= last) {
- mid = ((last - first) >> 1) + first;
- addr = extable_insn(mid);
- if (addr < value)
- first = mid + 1;
- else if (addr > value)
- last = mid - 1;
- else
- return mid;
- }
- return NULL;
-}
-
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- *
- */
-static int cmp_ex(const void *a, const void *b)
-{
- const struct exception_table_entry *x = a, *y = b;
-
- /* This compare is only valid after normalization. */
- return x->insn - y->insn;
-}
-
-void sort_extable(struct exception_table_entry *start,
- struct exception_table_entry *finish)
-{
- struct exception_table_entry *p;
- int i;
-
- /* Normalize entries to being relative to the start of the section */
- for (p = start, i = 0; p < finish; p++, i += 8)
- p->insn += i;
- sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
- /* Denormalize all entries */
- for (p = start, i = 0; p < finish; p++, i += 8)
- p->insn -= i;
-}
-
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
- /* Trim the beginning */
- while (m->num_exentries &&
- within_module_init(extable_insn(&m->extable[0]), m)) {
- m->extable++;
- m->num_exentries--;
- }
- /* Trim the end */
- while (m->num_exentries &&
- within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
- m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 18fccc303db7..02042b6b66bf 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -94,7 +94,7 @@ static DEFINE_MUTEX(dcss_lock);
static LIST_HEAD(dcss_list);
static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
"EW/EN-MIXED" };
-static int loadshr_scode, loadnsr_scode, findseg_scode;
+static int loadshr_scode, loadnsr_scode;
static int segext_scode, purgeseg_scode;
static int scode_set;
@@ -130,7 +130,6 @@ dcss_set_subcodes(void)
loadshr_scode = DCSS_LOADSHRX;
loadnsr_scode = DCSS_LOADNSRX;
purgeseg_scode = DCSS_PURGESEG;
- findseg_scode = DCSS_FINDSEGX;
segext_scode = DCSS_SEGEXTX;
return 0;
}
@@ -138,7 +137,6 @@ dcss_set_subcodes(void)
loadshr_scode = DCSS_LOADNOLY;
loadnsr_scode = DCSS_LOADNSR;
purgeseg_scode = DCSS_PURGESEG;
- findseg_scode = DCSS_FINDSEG;
segext_scode = DCSS_SEGEXT;
return 0;
}
@@ -267,7 +265,7 @@ query_segment_type (struct dcss_segment *seg)
goto out_free;
}
if (diag_cc > 1) {
- pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
+ pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
rc = dcss_diag_translate_rc (vmrc);
goto out_free;
}
@@ -459,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_resource;
}
if (diag_cc > 1) {
- pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
@@ -576,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_unlock;
}
if (atomic_read (&seg->ref_count) != 1) {
- pr_warning("DCSS %s is in use and cannot be reloaded\n",
- name);
+ pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
rc = -EAGAIN;
goto out_unlock;
}
@@ -590,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared)
seg->res->flags |= IORESOURCE_READONLY;
if (request_resource(&iomem_resource, seg->res)) {
- pr_warning("DCSS %s overlaps with used memory resources "
- "and cannot be reloaded\n", name);
+ pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+ name);
rc = -EBUSY;
kfree(seg->res);
goto out_del_mem;
@@ -609,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_del_res;
}
if (diag_cc > 1) {
- pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+ name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
goto out_del_res;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ec1a30d0d11a..cce577feab1e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/asm-offsets.h>
#include <asm/diag.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
@@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs)
{
unsigned long asce;
+ pr_alert("Failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
pr_alert("Fault in ");
switch (regs->int_parm_long & 3) {
case 3:
@@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs)
dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
}
-static inline void report_user_fault(struct pt_regs *regs, long signr)
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
return;
@@ -228,11 +233,10 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
return;
printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
regs->int_code & 0xffff, regs->int_code >> 17);
- print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+ print_vma_addr(KERN_CONT "in ", regs->psw.addr);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
- dump_fault_info(regs);
+ if (is_mm_fault)
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -244,7 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
struct siginfo si;
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 1);
si.si_signo = SIGSEGV;
si.si_code = si_code;
si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
@@ -254,12 +258,11 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
static noinline void do_no_context(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
- unsigned long address;
/* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+ fixup = search_exception_tables(regs->psw.addr);
if (fixup) {
- regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup);
return;
}
@@ -267,15 +270,12 @@ static noinline void do_no_context(struct pt_regs *regs)
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- address = regs->int_parm_long & __FAIL_ADDR_MASK;
if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" in virtual user address space\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 000000000000..cace818d86eb
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,774 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum address of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+{
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
+ gmap->mm = mm;
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ goto out_free;
+ page->index = 0;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, etype);
+ gmap->table = table;
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
+ list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+
+ /* Free all segment & region tables. */
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+ __free_pages(page, 2);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
+ list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
+ kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
+{
+ struct page *page;
+ unsigned long *new;
+
+ /* since we dont free the gmap table until gmap_free we can unlock */
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
+ if (*table & _REGION_ENTRY_INVALID) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
+ __free_pages(page, 2);
+ return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset, mask;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+ page = virt_to_page((void *)((unsigned long) entry & mask));
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
+ }
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ if (off >= len)
+ return 0;
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(gmap, gaddr);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ unsigned long *table;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ int rc;
+
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
+ /* Walk the parent mm page table */
+ mm = gmap->mm;
+ pgd = pgd_offset(mm, vmaddr);
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
+ /* Link gmap segment table entry location to page table. */
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ unsigned long vmaddr;
+ int rc;
+ bool unlocked;
+
+ down_read(&gmap->mm->mmap_sem);
+
+retry:
+ unlocked = false;
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ &unlocked)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ /*
+ * In the case that fixup_user_fault unlocked the mmap_sem during
+ * faultin redo __gmap_translate to not race with a map/unmap_segment.
+ */
+ if (unlocked)
+ goto retry;
+
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (vmaddr) {
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+ if (likely(ptep))
+ ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+ pte_unmap_unlock(ptep, ptl);
+ }
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+ unsigned long gaddr, vmaddr, size;
+ struct vm_area_struct *vma;
+
+ down_read(&gmap->mm->mmap_sem);
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ continue;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_add(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_init(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+{
+ unsigned long addr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+ bool unlocked;
+ int rc = 0;
+
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ while (len) {
+ unlocked = false;
+ /* Convert gmap address and connect the page tables */
+ addr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(addr)) {
+ rc = addr;
+ break;
+ }
+ /* Get the page mapped */
+ if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
+ &unlocked)) {
+ rc = -EFAULT;
+ break;
+ }
+ /* While trying to map mmap_sem got unlocked. Let us retry */
+ if (unlocked)
+ continue;
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
+ /* Walk the process page table, lock and get pte pointer */
+ ptep = get_locked_pte(gmap->mm, addr, &ptl);
+ VM_BUG_ON(!ptep);
+ /* Set notification bit in the pgste of the pte */
+ if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+ ptep_set_notify(gmap->mm, addr, ptep);
+ gaddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ pte_unmap_unlock(ptep, ptl);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+{
+ unsigned long offset, gaddr;
+ unsigned long *table;
+ struct gmap_notifier *nb;
+ struct gmap *gmap;
+
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
+ spin_lock(&gmap_notifier_lock);
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(gmap, gaddr);
+ }
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (addr = vma->vm_start;
+ addr < vma->vm_end;
+ addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ }
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ /* Do we have pgstes? if yes, we are done */
+ if (mm_has_pgste(mm))
+ return 0;
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ /*
+ * Remove all zero page mappings,
+ * after establishing a policy to forbid zero page mappings
+ * following faults for that page will get fresh anonymous pages
+ */
+ if (is_zero_pfn(pte_pfn(*pte)))
+ ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
+ /* Clear storage key */
+ ptep_zap_key(walk->mm, addr, pte);
+ return 0;
+}
+
+int s390_enable_skey(void)
+{
+ struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc = 0;
+
+ down_write(&mm->mmap_sem);
+ if (mm_use_skey(mm))
+ goto out_up;
+
+ mm->context.use_skey = 1;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags)) {
+ mm->context.use_skey = 0;
+ rc = -ENOMEM;
+ goto out_up;
+ }
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ ptep_zap_unused(walk->mm, addr, pte, 1);
+ return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+ down_write(&mm->mmap_sem);
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 12bbf0e8478f..a8a6765f1a51 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -20,9 +20,9 @@
static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
+ struct page *head, *page;
unsigned long mask;
pte_t *ptep, pte;
- struct page *page;
mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
@@ -37,12 +37,14 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
+ put_page(head);
return 0;
}
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
(*nr)++;
@@ -55,7 +57,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
- struct page *head, *page, *tail;
+ struct page *head, *page;
int refs;
result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
@@ -67,7 +69,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
@@ -88,16 +89,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
return 0;
}
- /*
- * Any tail page need their mapcount reference taken before we
- * return.
- */
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
return 1;
}
@@ -116,16 +107,7 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
pmd = *pmdp;
barrier();
next = pmd_addr_end(addr, end);
- /*
- * The pmd_trans_splitting() check below explains why
- * pmdp_splitting_flush() has to serialize with
- * smp_call_function() against our disabled IRQs, to stop
- * this gup-fast code from running while we set the
- * splitting bit in the pmd. Returning zero will take
- * the slow path that will call wait_split_huge_page()
- * if the pmd is still in splitting state.
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+ if (pmd_none(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
/*
@@ -230,9 +212,9 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
int nr, ret;
+ might_sleep();
start &= PAGE_MASK;
nr = __get_user_pages_fast(start, nr_pages, write, pages);
if (nr == nr_pages)
@@ -241,8 +223,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
- ret = get_user_pages_unlocked(current, mm, start,
- nr_pages - nr, write, 0, pages);
+ ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
/* Have to be a bit careful with return values */
if (nr > 0)
ret = (ret < 0) ? nr : ret + nr;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f81096b6940d..1b5e8983f4f3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pmd_t *pmdp = (pmd_t *) ptep;
- pte_t pte = huge_ptep_get(ptep);
+ pmd_t old;
- pmdp_flush_direct(mm, addr, pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
- return pte;
+ old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return __pmd_to_pte(old);
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c3c07d3505ba..c7b0451397d6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -48,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
- struct cpuid cpu_id;
unsigned int order;
struct page *page;
int i;
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672: /* g5 */
- case 0x2064: /* z900 */
- case 0x2066: /* z900 */
- case 0x2084: /* z990 */
- case 0x2086: /* z990 */
- case 0x2094: /* z9-109 */
- case 0x2096: /* z9-109 */
- order = 0;
- break;
- case 0x2097: /* z10 */
- case 0x2098: /* z10 */
- case 0x2817: /* z196 */
- case 0x2818: /* z196 */
- order = 2;
- break;
- case 0x2827: /* zEC12 */
- case 0x2828: /* zEC12 */
- order = 5;
- break;
- case 0x2964: /* z13 */
- default:
- order = 7;
- break;
- }
+ /* Latest machines require a mapping granularity of 512KB */
+ order = 7;
+
/* Limit number of empty zero pages for small memory sizes */
while (order > 2 && (totalram_pages >> 10) < (1UL << order))
order--;
@@ -122,7 +98,7 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
- arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+ __arch_local_irq_stosm(0x04);
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
@@ -132,6 +108,13 @@ void __init paging_init(void)
free_area_init_nodes(max_zone_pfns);
}
+void mark_rodata_ro(void)
+{
+ /* Text and rodata are already protected. Nothing to do here. */
+ pr_info("Write protecting the kernel read-only data: %luk\n",
+ ((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+}
+
void __init mem_init(void)
{
if (MACHINE_HAS_TLB_LC)
@@ -150,9 +133,6 @@ void __init mem_init(void)
setup_zero_pages(); /* Setup zeroed pages. */
mem_init_print_info(NULL);
- printk("Write protected kernel read-only data: %#lx - %#lx\n",
- (unsigned long)&_stext,
- PFN_ALIGN((unsigned long)&_eshared) - 1);
}
void free_initmem(void)
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 8a993a53fcd6..792f9c63fbca 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -93,15 +93,19 @@ static int __memcpy_real(void *dest, void *src, size_t count)
*/
int memcpy_real(void *dest, void *src, size_t count)
{
+ int irqs_disabled, rc;
unsigned long flags;
- int rc;
if (!count)
return 0;
- local_irq_save(flags);
- __arch_local_irq_stnsm(0xfbUL);
+ flags = __arch_local_irq_stnsm(0xf8UL);
+ irqs_disabled = arch_irqs_disabled_flags(flags);
+ if (!irqs_disabled)
+ trace_hardirqs_off();
rc = __memcpy_real(dest, src, count);
- local_irq_restore(flags);
+ if (!irqs_disabled)
+ trace_hardirqs_on();
+ __arch_local_irq_ssm(flags);
return rc;
}
@@ -163,11 +167,11 @@ static int is_swapped(unsigned long addr)
unsigned long lc;
int cpu;
- if (addr < sizeof(struct _lowcore))
+ if (addr < sizeof(struct lowcore))
return 1;
for_each_online_cpu(cpu) {
lc = (unsigned long) lowcore_ptr[cpu];
- if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
+ if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
continue;
return 1;
}
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
index e00f0d5d296d..d612cc3eec6a 100644
--- a/arch/s390/mm/mem_detect.c
+++ b/arch/s390/mm/mem_detect.c
@@ -14,8 +14,6 @@
#include <asm/sclp.h>
#include <asm/setup.h>
-#define ADDR2G (1ULL << 31)
-
#define CHUNK_READ_WRITE 0
#define CHUNK_READ_ONLY 1
@@ -27,15 +25,14 @@ static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
void __init detect_memory_memblock(void)
{
- unsigned long long memsize, rnmax, rzm;
- unsigned long addr, size;
+ unsigned long memsize, rnmax, rzm, addr, size;
int type;
rzm = sclp.rzm;
rnmax = sclp.rnmax;
memsize = rzm * rnmax;
if (!rzm)
- rzm = 1ULL << 17;
+ rzm = 1UL << 17;
max_physmem_end = memsize;
addr = 0;
/* keep memblock lists close to the kernel */
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 6e552af08c76..45c4daa49930 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -31,9 +31,6 @@
#include <linux/security.h>
#include <asm/pgalloc.h>
-unsigned long mmap_rnd_mask;
-static unsigned long mmap_align_mask;
-
static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
@@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void)
unsigned long arch_mmap_rnd(void)
{
- if (is_32bit_task())
- return (get_random_int() & 0x7ff) << PAGE_SHIFT;
- else
- return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+ return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int do_color_align;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
@@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int do_color_align;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
@@ -179,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
{
- if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+ if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
return 0;
if (!(flags & MAP_FIXED))
addr = 0;
if ((addr + len) >= TASK_SIZE)
- return crst_table_upgrade(current->mm, 1UL << 53);
+ return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
return 0;
}
@@ -199,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
if (!(area & ~PAGE_MASK))
return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+ if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
/* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm, 1UL << 53);
+ rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
if (rc)
return (unsigned long) rc;
area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -221,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
if (!(area & ~PAGE_MASK))
return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+ if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
/* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm, 1UL << 53);
+ rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
if (rc)
return (unsigned long) rc;
area = arch_get_unmapped_area_topdown(filp, addr, len,
@@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
}
}
-
-static int __init setup_mmap_rnd(void)
-{
- struct cpuid cpu_id;
-
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672:
- case 0x2064:
- case 0x2066:
- case 0x2084:
- case 0x2086:
- case 0x2094:
- case 0x2096:
- case 0x2097:
- case 0x2098:
- case 0x2817:
- case 0x2818:
- case 0x2827:
- case 0x2828:
- mmap_rnd_mask = 0x7ffUL;
- mmap_align_mask = 0UL;
- break;
- case 0x2964: /* z13 */
- default:
- mmap_rnd_mask = 0x3ff80UL;
- mmap_align_mask = 0x7fUL;
- break;
- }
- return 0;
-}
-early_initcall(setup_mmap_rnd);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 749c98407b41..f2a5c29a97e9 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr)
static void change_page_attr(unsigned long addr, int numpages,
pte_t (*set) (pte_t))
{
- pte_t *ptep, pte;
+ pte_t *ptep;
int i;
for (i = 0; i < numpages; i++) {
ptep = walk_page_table(addr);
if (WARN_ON_ONCE(!ptep))
break;
- pte = *ptep;
- pte = set(pte);
- __ptep_ipte(addr, ptep);
- *ptep = pte;
+ *ptep = set(*ptep);
addr += PAGE_SIZE;
}
+ __tlb_flush_kernel();
}
int set_memory_ro(unsigned long addr, int numpages)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 000000000000..f6c3de26cda8
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,360 @@
+/*
+ * Page table allocation functions
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
+
+ if (!page)
+ return NULL;
+ return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ free_pages((unsigned long) table, 2);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ set_user_asce(mm);
+ }
+ __tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+ unsigned long *table, *pgd;
+ unsigned long entry;
+ int flush;
+
+ BUG_ON(limit > TASK_MAX_SIZE);
+ flush = 0;
+repeat:
+ table = crst_table_alloc(mm);
+ if (!table)
+ return -ENOMEM;
+ spin_lock_bh(&mm->page_table_lock);
+ if (mm->context.asce_limit < limit) {
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit <= (1UL << 31)) {
+ entry = _REGION3_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ } else {
+ entry = _REGION2_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION2;
+ }
+ crst_table_init(table, entry);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->task_size = mm->context.asce_limit;
+ table = NULL;
+ flush = 1;
+ }
+ spin_unlock_bh(&mm->page_table_lock);
+ if (table)
+ crst_table_free(mm, table);
+ if (mm->context.asce_limit < limit)
+ goto repeat;
+ if (flush)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+ pgd_t *pgd;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
+ while (mm->context.asce_limit > limit) {
+ pgd = mm->pgd;
+ switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+ case _REGION_ENTRY_TYPE_R2:
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ break;
+ case _REGION_ENTRY_TYPE_R3:
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_SEGMENT;
+ break;
+ default:
+ BUG();
+ }
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ mm->task_size = mm->context.asce_limit;
+ crst_table_free(mm, (unsigned long *) pgd);
+ }
+ if (current->active_mm == mm)
+ set_user_asce(mm);
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+ unsigned long *table;
+ struct page *page;
+ unsigned int mask, bit;
+
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
+ }
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
+ atomic_set(&page->_mapcount, 1);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+ spin_lock_bh(&mm->context.list_lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ }
+ return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ struct page *page;
+ unsigned int bit, mask;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
+ }
+
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ struct page *page;
+ unsigned int bit, mask;
+
+ mm = tlb->mm;
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (mm_alloc_pgste(mm)) {
+ gmap_unlink(mm, table, vmaddr);
+ table = (unsigned long *) (__pa(table) | 3);
+ tlb_remove_table(tlb, table);
+ return;
+ }
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ table = (unsigned long *) (__pa(table) | (1U << bit));
+ tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->mm->context.flush_mm = 1;
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm_lazy(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_flush_mmu(tlb);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 54ef3bc01b43..4324b87f9398 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,799 +24,501 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-unsigned long *crst_table_alloc(struct mm_struct *mm)
-{
- struct page *page = alloc_pages(GFP_KERNEL, 2);
-
- if (!page)
- return NULL;
- return (unsigned long *) page_to_phys(page);
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(addr, ptep);
+ else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-void crst_table_free(struct mm_struct *mm, unsigned long *table)
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- free_pages((unsigned long) table, 2);
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-static void __crst_table_upgrade(void *arg)
+static inline pgste_t pgste_get_lock(pte_t *ptep)
{
- struct mm_struct *mm = arg;
+ unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long old;
- if (current->active_mm == mm) {
- clear_user_asce();
- set_user_asce(mm);
- }
- __tlb_flush_local();
+ preempt_disable();
+ asm(
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+ return __pgste(new);
}
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
- unsigned long *table, *pgd;
- unsigned long entry;
- int flush;
-
- BUG_ON(limit > (1UL << 53));
- flush = 0;
-repeat:
- table = crst_table_alloc(mm);
- if (!table)
- return -ENOMEM;
- spin_lock_bh(&mm->page_table_lock);
- if (mm->context.asce_limit < limit) {
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit <= (1UL << 31)) {
- entry = _REGION3_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- } else {
- entry = _REGION2_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION2;
- }
- crst_table_init(table, entry);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->task_size = mm->context.asce_limit;
- table = NULL;
- flush = 1;
- }
- spin_unlock_bh(&mm->page_table_lock);
- if (table)
- crst_table_free(mm, table);
- if (mm->context.asce_limit < limit)
- goto repeat;
- if (flush)
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
+#ifdef CONFIG_PGSTE
+ asm(
+ " nihh %1,0xff7f\n" /* clear PCL bit */
+ " stg %1,%0\n"
+ : "=Q" (ptep[PTRS_PER_PTE])
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
+ preempt_enable();
+#endif
}
-void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_get(pte_t *ptep)
{
- pgd_t *pgd;
-
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
- }
- while (mm->context.asce_limit > limit) {
- pgd = mm->pgd;
- switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
- case _REGION_ENTRY_TYPE_R2:
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- break;
- case _REGION_ENTRY_TYPE_R3:
- mm->context.asce_limit = 1UL << 31;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_SEGMENT;
- break;
- default:
- BUG();
- }
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->task_size = mm->context.asce_limit;
- crst_table_free(mm, (unsigned long *) pgd);
- }
- if (current->active_mm == mm)
- set_user_asce(mm);
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
}
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
-/**
- * gmap_alloc - allocate a guest address space
- * @mm: pointer to the parent mm_struct
- * @limit: maximum size of the gmap address space
- *
- * Returns a guest address space structure.
- */
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+ struct mm_struct *mm)
{
- struct gmap *gmap;
- struct page *page;
- unsigned long *table;
- unsigned long etype, atype;
-
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
- atype = _ASCE_TYPE_SEGMENT;
- etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
- atype = _ASCE_TYPE_REGION3;
- etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
- atype = _ASCE_TYPE_REGION2;
- etype = _REGION2_ENTRY_EMPTY;
- } else {
- limit = -1UL;
- atype = _ASCE_TYPE_REGION1;
- etype = _REGION1_ENTRY_EMPTY;
- }
- gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
- if (!gmap)
- goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
- INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
- INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
- spin_lock_init(&gmap->guest_table_lock);
- gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
- table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, etype);
- gmap->table = table;
- gmap->asce = atype | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
- gmap->asce_end = limit;
- down_write(&mm->mmap_sem);
- list_add(&gmap->list, &mm->context.gmap_list);
- up_write(&mm->mmap_sem);
- return gmap;
-
-out_free:
- kfree(gmap);
-out:
- return NULL;
+#ifdef CONFIG_PGSTE
+ unsigned long address, bits, skey;
+
+ if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+ return pgste;
+ address = pte_val(pte) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+ return pgste;
+
}
-EXPORT_SYMBOL_GPL(gmap_alloc);
-static void gmap_flush_tlb(struct gmap *gmap)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
{
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
+#ifdef CONFIG_PGSTE
+ unsigned long address;
+ unsigned long nkey;
+
+ if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+ return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+ address = pte_val(entry) & PAGE_MASK;
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
+#endif
}
-static void gmap_radix_tree_free(struct radix_tree_root *root)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
- struct radix_tree_iter iter;
- unsigned long indices[16];
- unsigned long index;
- void **slot;
- int i, nr;
-
- /* A radix tree is freed by deleting all of its entries */
- index = 0;
- do {
- nr = 0;
- radix_tree_for_each_slot(slot, root, &iter, index) {
- indices[nr] = iter.index;
- if (++nr == 16)
- break;
- }
- for (i = 0; i < nr; i++) {
- index = indices[i];
- radix_tree_delete(root, index);
+#ifdef CONFIG_PGSTE
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
}
- } while (nr > 0);
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+#endif
+ *ptep = entry;
+ return pgste;
}
-/**
- * gmap_free - free a guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_free(struct gmap *gmap)
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, pgste_t pgste)
{
- struct page *page, *next;
-
- /* Flush tlb. */
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
-
- /* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
- gmap_radix_tree_free(&gmap->guest_to_host);
- gmap_radix_tree_free(&gmap->host_to_guest);
- down_write(&gmap->mm->mmap_sem);
- list_del(&gmap->list);
- up_write(&gmap->mm->mmap_sem);
- kfree(gmap);
+#ifdef CONFIG_PGSTE
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
+ ptep_notify(mm, addr, ptep);
+ }
+#endif
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_free);
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- S390_lowcore.gmap = (unsigned long) gmap;
+ pgste_t pgste = __pgste(0);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ }
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_enable);
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
+static inline void ptep_xchg_commit(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pgste_t pgste, pte_t old, pte_t new)
{
- S390_lowcore.gmap = 0UL;
+ if (mm_has_pgste(mm)) {
+ if (pte_val(old) & _PAGE_INVALID)
+ pgste_set_key(ptep, pgste, new, mm);
+ if (pte_val(new) & _PAGE_INVALID) {
+ pgste = pgste_update_all(old, pgste, mm);
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(old) |= _PAGE_UNUSED;
+ }
+ pgste = pgste_set_pte(ptep, pgste, new);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = new;
+ }
}
-EXPORT_SYMBOL_GPL(gmap_disable);
-/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
- */
-static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
- unsigned long init, unsigned long gaddr)
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long *new;
-
- /* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- return -ENOMEM;
- new = (unsigned long *) page_to_phys(page);
- crst_table_init(new, init);
- spin_lock(&gmap->mm->page_table_lock);
- if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
- *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
- (*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
- page = NULL;
- }
- spin_unlock(&gmap->mm->page_table_lock);
- if (page)
- __free_pages(page, 2);
- return 0;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_direct(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_direct);
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long offset, mask;
-
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
- page = virt_to_page((void *)((unsigned long) entry & mask));
- return page->index + offset;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_lazy);
-/**
- * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
- * @gmap: pointer to the guest address space structure
- * @vmaddr: address in the host process address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- unsigned long *entry;
- int flush = 0;
-
- spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_INVALID);
- *entry = _SEGMENT_ENTRY_INVALID;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(old, pgste, mm);
+ pgste_set(ptep, pgste);
}
- spin_unlock(&gmap->guest_table_lock);
- return flush;
+ return old;
}
+EXPORT_SYMBOL(ptep_modify_prot_start);
-/**
- * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
- * @gmap: pointer to the guest address space structure
- * @gaddr: address in the guest address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- unsigned long vmaddr;
+ pgste_t pgste;
- vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = pte;
+ }
}
+EXPORT_SYMBOL(ptep_modify_prot_commit);
-/**
- * gmap_unmap_segment - unmap segment from the guest address space
- * @gmap: pointer to the guest address space structure
- * @to: address in the guest address space
- * @len: length of the memory area to unmap
- *
- * Returns 0 if the unmap succeeded, -EINVAL if not.
- */
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
{
- unsigned long off;
- int flush;
-
- if ((to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || to + len < to)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE)
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- return 0;
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return old;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(addr, pmdp);
+ else
+ __pmdp_idte(addr, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp);
+ else
+ __pmdp_csp(pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_unmap_segment);
-
-/**
- * gmap_mmap_segment - map a segment to the guest address space
- * @gmap: pointer to the guest address space structure
- * @from: source address in the parent address space
- * @to: target address in the guest address space
- * @len: length of the memory area to map
- *
- * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
- */
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len)
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long off;
- int flush;
-
- if ((from | to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || from + len < from || to + len < to ||
- from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Remove old translation */
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- /* Store new translation */
- if (radix_tree_insert(&gmap->guest_to_host,
- (to + off) >> PMD_SHIFT,
- (void *) from + off))
- break;
- }
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- if (off >= len)
- return 0;
- gmap_unmap_segment(gmap, to, len);
- return -ENOMEM;
+ pmd_t old;
+
+ old = pmdp_flush_direct(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_map_segment);
-
-/**
- * __gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long vmaddr;
+ pmd_t old;
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
- return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+ old = pmdp_flush_lazy(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(__gmap_translate);
-
-/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
{
- unsigned long rc;
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(gmap, gaddr);
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
-EXPORT_SYMBOL_GPL(gmap_translate);
-/**
- * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
- * @table: pointer to the host page table
- * @vmaddr: vm address associated with the host page table
- */
-static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- struct gmap *gmap;
- int flush;
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
- if (flush)
- gmap_flush_tlb(gmap);
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
}
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_INVALID;
+ ptep++;
+ pte_val(*ptep) = _PAGE_INVALID;
+ return pgtable;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/**
- * gmap_link - set up shadow page tables to connect a host to a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @vmaddr: vm address
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
{
- struct mm_struct *mm;
- unsigned long *table;
- spinlock_t *ptl;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- int rc;
-
- /* Create higher level tables in the gmap page table */
- table = gmap->table;
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- table += (gaddr >> 20) & 0x7ff;
- /* Walk the parent mm page table */
- mm = gmap->mm;
- pgd = pgd_offset(mm, vmaddr);
- VM_BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, vmaddr);
- VM_BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, vmaddr);
- VM_BUG_ON(pmd_none(*pmd));
- /* large pmds cannot yet be handled */
- if (pmd_large(*pmd))
- return -EFAULT;
- /* Link gmap segment table entry location to page table. */
- rc = radix_tree_preload(GFP_KERNEL);
- if (rc)
- return rc;
- ptl = pmd_lock(mm, pmd);
- spin_lock(&gmap->guest_table_lock);
- if (*table == _SEGMENT_ENTRY_INVALID) {
- rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
- if (!rc)
- *table = pmd_val(*pmd);
- } else
- rc = 0;
- spin_unlock(&gmap->guest_table_lock);
- spin_unlock(ptl);
- radix_tree_preload_end();
- return rc;
+ pgste_t pgste;
+
+ /* the mm_has_pgste() check is done in set_pte_at() */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
}
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long vmaddr;
- int rc;
-
- down_read(&gmap->mm->mmap_sem);
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = vmaddr;
- goto out_up;
- }
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
- rc = -EFAULT;
- goto out_up;
- }
- rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ pgste_t pgste;
+
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_fault);
-static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
{
if (!non_swap_entry(entry))
dec_mm_counter(mm, MM_SWAPENTS);
else if (is_migration_entry(entry)) {
struct page *page = migration_entry_to_page(entry);
- if (PageAnon(page))
- dec_mm_counter(mm, MM_ANONPAGES);
- else
- dec_mm_counter(mm, MM_FILEPAGES);
+ dec_mm_counter(mm, mm_counter(page));
}
free_swap_and_cache(entry);
}
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int reset)
{
- unsigned long vmaddr, ptev, pgstev;
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ unsigned long pgstev;
pgste_t pgste;
+ pte_t pte;
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- return;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (unlikely(!ptep))
- return;
- pte = *ptep;
- if (!pte_swap(pte))
- goto out_pte;
/* Zap unused and logically-zero pages */
pgste = pgste_get_lock(ptep);
pgstev = pgste_val(pgste);
- ptev = pte_val(pte);
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
- pte_clear(gmap->mm, vmaddr, ptep);
- }
+ pte = *ptep;
+ if (pte_swap(pte) &&
+ ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO))) {
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+ pte_clear(mm, addr, ptep);
+ }
+ if (reset)
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
pgste_set_unlock(ptep, pgste);
-out_pte:
- pte_unmap_unlock(ptep, ptl);
}
-EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- down_read(&gmap->mm->mmap_sem);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size, NULL);
- }
- up_read(&gmap->mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
-static LIST_HEAD(gmap_notifier_list);
-static DEFINE_SPINLOCK(gmap_notifier_lock);
+ unsigned long ptev;
+ pgste_t pgste;
-/**
- * gmap_register_ipte_notifier - register a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_register_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_add(&nb->list, &gmap_notifier_list);
- spin_unlock(&gmap_notifier_lock);
+ /* Clear storage key */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+ ptev = pte_val(*ptep);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
-/**
- * gmap_unregister_ipte_notifier - remove a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_del_init(&nb->list);
- spin_unlock(&gmap_notifier_lock);
-}
-EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
-
-/**
- * gmap_ipte_notify - mark a range of ptes for invalidation notification
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- *
- * Returns 0 if for each page in the given range a gmap mapping exists and
- * the invalidation notification could be set. If the gmap mapping is missing
- * for one or more pages -EFAULT is returned. If no memory could be allocated
- * -ENOMEM is returned. This function establishes missing page table entries.
+/*
+ * Test and reset if a guest page is dirty
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
- unsigned long addr;
spinlock_t *ptl;
- pte_t *ptep, entry;
pgste_t pgste;
- int rc = 0;
-
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
- return -EINVAL;
- down_read(&gmap->mm->mmap_sem);
- while (len) {
- /* Convert gmap address and connect the page tables */
- addr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(addr)) {
- rc = addr;
- break;
- }
- /* Get the page mapped */
- if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
- rc = -EFAULT;
- break;
- }
- rc = __gmap_link(gmap, gaddr, addr);
- if (rc)
- break;
- /* Walk the process page table, lock and get pte pointer */
- ptep = get_locked_pte(gmap->mm, addr, &ptl);
- VM_BUG_ON(!ptep);
- /* Set notification bit in the pgste of the pte */
- entry = *ptep;
- if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= PGSTE_IN_BIT;
- pgste_set_unlock(ptep, pgste);
- gaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
- }
- pte_unmap_unlock(ptep, ptl);
- }
- up_read(&gmap->mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_ipte_notify);
-
-/**
- * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
- * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
- * @pte: pointer to the page table entry
- *
- * This function is assumed to be called with the page table lock held
- * for the pte to notify.
- */
-void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
-{
- unsigned long offset, gaddr;
- unsigned long *table;
- struct gmap_notifier *nb;
- struct gmap *gmap;
-
- offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
- spin_lock(&gmap_notifier_lock);
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (!table)
- continue;
- gaddr = __gmap_segment_gaddr(table) + offset;
- list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(gmap, gaddr);
+ pte_t *ptep;
+ pte_t pte;
+ bool dirty;
+
+ ptep = get_locked_pte(mm, addr, &ptl);
+ if (unlikely(!ptep))
+ return false;
+
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ __ptep_ipte(addr, ptep);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
}
- spin_unlock(&gmap_notifier_lock);
+ pgste_set_unlock(ptep, pgste);
+
+ spin_unlock(ptl);
+ return dirty;
}
-EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq)
+ unsigned char key, bool nq)
{
+ unsigned long keyul;
spinlock_t *ptl;
pgste_t old, new;
pte_t *ptep;
down_read(&mm->mmap_sem);
-retry:
ptep = get_locked_pte(mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
- if (!(pte_val(*ptep) & _PAGE_INVALID) &&
- (pte_val(*ptep) & _PAGE_PROTECT)) {
- pte_unmap_unlock(ptep, ptl);
- if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
- up_read(&mm->mmap_sem);
- return -EFAULT;
- }
- goto retry;
- }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
- pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ keyul = (unsigned long) key;
+ pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long address, bits, skey;
@@ -841,13 +543,12 @@ retry:
}
EXPORT_SYMBOL(set_guest_storage_key);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
{
+ unsigned char key;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
- uint64_t physaddr;
- unsigned long key = 0;
down_read(&mm->mmap_sem);
ptep = get_locked_pte(mm, addr, &ptl);
@@ -858,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
pgste = pgste_get_lock(ptep);
if (pte_val(*ptep) & _PAGE_INVALID) {
- key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+ key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
} else {
- physaddr = pte_val(*ptep) & PAGE_MASK;
- key = page_get_storage_key(physaddr);
+ key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
/* Reflect guest's logical view, not physical */
if (pgste_val(pgste) & PGSTE_GR_BIT)
@@ -879,487 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
return key;
}
EXPORT_SYMBOL(get_guest_storage_key);
-
-static int page_table_allocate_pgste_min = 0;
-static int page_table_allocate_pgste_max = 1;
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
- .extra1 = &page_table_allocate_pgste_min,
- .extra2 = &page_table_allocate_pgste_max,
- },
- { }
-};
-
-static struct ctl_table page_table_sysctl_dir[] = {
- {
- .procname = "vm",
- .maxlen = 0,
- .mode = 0555,
- .child = page_table_sysctl,
- },
- { }
-};
-
-static int __init page_table_register_sysctl(void)
-{
- return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#else /* CONFIG_PGSTE */
-
-static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
-{
-}
-
-#endif /* CONFIG_PGSTE */
-
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- unsigned int old, new;
-
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
-}
-
-/*
- * page table entry allocation/free routines.
- */
-unsigned long *page_table_alloc(struct mm_struct *mm)
-{
- unsigned long *table;
- struct page *page;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.list_lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- mask = atomic_read(&page->_mapcount);
- mask = (mask | (mask >> 4)) & 3;
- if (mask != 3) {
- table = (unsigned long *) page_to_phys(page);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_mapcount, 1U << bit);
- list_del(&page->lru);
- }
- }
- spin_unlock_bh(&mm->context.list_lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- /* Initialize page table */
- table = (unsigned long *) page_to_phys(page);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- atomic_set(&page->_mapcount, 3);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_set(&page->_mapcount, 1);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE);
- spin_lock_bh(&mm->context.list_lock);
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- }
- return table;
-}
-
-void page_table_free(struct mm_struct *mm, unsigned long *table)
-{
- struct page *page;
- unsigned int bit, mask;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
- if (mask & 3)
- list_add(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask != 0)
- return;
- }
-
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
-{
- struct mm_struct *mm;
- struct page *page;
- unsigned int bit, mask;
-
- mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | 3);
- tlb_remove_table(tlb, table);
- return;
- }
- bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
- if (mask & 3)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (1U << bit));
- tlb_remove_table(tlb, table);
-}
-
-static void __tlb_remove_table(void *_table)
-{
- unsigned int mask = (unsigned long) _table & 3;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-
- switch (mask) {
- case 0: /* pmd or pud */
- free_pages((unsigned long) table, 2);
- break;
- case 1: /* lower 2K of a 4K page table */
- case 2: /* higher 2K of a 4K page table */
- if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
- break;
- /* fallthrough */
- case 3: /* 4K page table with pgstes */
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- break;
- }
-}
-
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void thp_split_vma(struct vm_area_struct *vma)
-{
- unsigned long addr;
-
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
-}
-
-static inline void thp_split_mm(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
-
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- thp_split_vma(vma);
- vma->vm_flags &= ~VM_HUGEPAGE;
- vma->vm_flags |= VM_NOHUGEPAGE;
- }
- mm->def_flags |= VM_NOHUGEPAGE;
-}
-#else
-static inline void thp_split_mm(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-/*
- * switch on pgstes for its userspace process (for kvm)
- */
-int s390_enable_sie(void)
-{
- struct mm_struct *mm = current->mm;
-
- /* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(mm))
- return 0;
- /* Fail if the page tables are 2K */
- if (!mm_alloc_pgste(mm))
- return -EINVAL;
- down_write(&mm->mmap_sem);
- mm->context.has_pgste = 1;
- /* split thp mappings and disable thp for future mappings */
- thp_split_mm(mm);
- up_write(&mm->mmap_sem);
- return 0;
-}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
-
-/*
- * Enable storage key handling from now on and initialize the storage
- * keys with the default key.
- */
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- unsigned long ptev;
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte))) {
- ptep_flush_direct(walk->mm, addr, pte);
- pte_val(*pte) = _PAGE_INVALID;
- }
- /* Clear storage key */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
- ptev = pte_val(*pte);
- if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-int s390_enable_skey(void)
-{
- struct mm_walk walk = { .pte_entry = __s390_enable_skey };
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int rc = 0;
-
- down_write(&mm->mmap_sem);
- if (mm_use_skey(mm))
- goto out_up;
-
- mm->context.use_skey = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.use_skey = 0;
- rc = -ENOMEM;
- goto out_up;
- }
- }
- mm->def_flags &= ~VM_MERGEABLE;
-
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
-
-out_up:
- up_write(&mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_enable_skey);
-
-/*
- * Reset CMMA state, make all pages stable again.
- */
-static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-void s390_reset_cmma(struct mm_struct *mm)
-{
- struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
- down_write(&mm->mmap_sem);
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
- up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(s390_reset_cmma);
-
-/*
- * Test and reset if a guest page is dirty
- */
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
-{
- pte_t *pte;
- spinlock_t *ptl;
- bool dirty = false;
-
- pte = get_locked_pte(gmap->mm, address, &ptl);
- if (unlikely(!pte))
- return false;
-
- if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
- dirty = true;
-
- spin_unlock(ptl);
- return dirty;
-}
-EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB */
- return pmdp_test_and_clear_young(vma, address, pmdp);
-}
-
-int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
- entry = pmd_mkyoung(entry);
- if (dirty)
- entry = pmd_mkdirty(entry);
- if (pmd_same(*pmdp, entry))
- return 0;
- pmdp_invalidate(vma, address, pmdp);
- set_pmd_at(vma->vm_mm, address, pmdp, entry);
- return 1;
-}
-
-static void pmdp_splitting_flush_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
- (unsigned long *) pmdp)) {
- /* need to serialize against gup-fast (IRQ disabled) */
- smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
- }
-}
-
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
-{
- struct list_head *lh = (struct list_head *) pgtable;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(lh);
- else
- list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
- pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
- struct list_head *lh;
- pgtable_t pgtable;
- pte_t *ptep;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- pgtable = pmd_huge_pte(mm, pmdp);
- lh = (struct list_head *) pgtable;
- if (list_empty(lh))
- pmd_huge_pte(mm, pmdp) = NULL;
- else {
- pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
- list_del(lh);
- }
- ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_INVALID;
- ptep++;
- pte_val(*ptep) = _PAGE_INVALID;
- return pgtable;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ef7d6c8fea66..d27fccbad7c1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pgd_populate(&init_mm, pg_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
+ !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pud_val(*pu_dir) = __pa(address) |
_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
(ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
-#endif
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
@@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pud_populate(&init_mm, pu_dir, pm_dir);
}
pm_dir = pmd_offset(pu_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
_SEGMENT_ENTRY_YOUNG |
@@ -121,7 +120,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
address += PMD_SIZE;
continue;
}
-#endif
if (pmd_none(*pm_dir)) {
pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 9a0c4c22e536..3c0bfc1f2694 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit)
* Save registers and create stack frame if necessary.
* See stack frame layout desription in "bpf_jit.h"!
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic)
+static void bpf_jit_prologue(struct bpf_jit *jit)
{
if (jit->seen & SEEN_TAIL_CALL) {
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
@@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic)
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
STK_OFF_SKBP);
- /* Clear A (%b0) and X (%b7) registers for converted BPF programs */
- if (is_classic) {
- if (REG_SEEN(BPF_REG_A))
- /* lghi %ba,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_A, 0);
- if (REG_SEEN(BPF_REG_X))
- /* lghi %bx,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_X, 0);
- }
}
/*
@@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
jit->lit = jit->lit_start;
jit->prg = 0;
- bpf_jit_prologue(jit, bpf_prog_was_classic(fp));
+ bpf_jit_prologue(jit);
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i);
if (insn_count < 0)
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 43f32ce60aa3..2794845061c6 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
{
pg_data_t *res;
- res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
- if (!res)
- panic("Could not allocate memory for node data!\n");
+ res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
memset(res, 0, sizeof(pg_data_t));
return res;
}
@@ -162,7 +160,7 @@ static int __init numa_init_late(void)
register_one_node(nid);
return 0;
}
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
static int __init parse_debug(char *parm)
{
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 1bd23017191e..496e4a7ee00e 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y := $(DRIVER_OBJS) init.o
oprofile-y += hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
deleted file mode 100644
index 8a6811b2cdb9..000000000000
--- a/arch/s390/oprofile/backtrace.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * S390 Version
- * Copyright IBM Corp. 2005
- * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
- */
-
-#include <linux/oprofile.h>
-
-#include <asm/processor.h> /* for struct stack_frame */
-
-static unsigned long
-__show_trace(unsigned int *depth, unsigned long sp,
- unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (*depth) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
-
- /* Follow the backchain. */
- while (*depth) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
-
- }
-
- if (*depth == 0)
- break;
-
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
- }
- return sp;
-}
-
-void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
- unsigned long head;
- struct stack_frame* head_sf;
-
- if (user_mode(regs))
- return;
-
- head = regs->gprs[15];
- head_sf = (struct stack_frame*)head;
-
- if (!head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
-
- head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
-
- __show_trace(&depth, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
-}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..791935a65800 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -20,8 +20,6 @@
#include "../../../drivers/oprofile/oprof.h"
-extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-
#include "hwsampler.h"
#include "op_counter.h"
@@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+ case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
default: return -ENODEV;
}
}
@@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void)
hwsampler_shutdown();
}
+static int __s390_backtrace(void *data, unsigned long address)
+{
+ unsigned int *depth = data;
+
+ if (*depth == 0)
+ return 1;
+ (*depth)--;
+ oprofile_add_trace(address);
+ return 0;
+}
+
+static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
+{
+ if (user_mode(regs))
+ return;
+ dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+}
+
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
ops->backtrace = s390_backtrace;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7ef12a3ace3a..871af75c69c2 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
.isc = PCI_ISC,
};
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES \
+ min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \
+ ZPCI_IOMAP_MAX_ENTRIES)
+
static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
@@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
unsigned long max)
{
struct zpci_dev *zdev = to_zpci(pdev);
- u64 addr;
int idx;
- if ((bar & 7) != bar)
+ if (!pci_resource_len(pdev, bar))
return NULL;
idx = zdev->bars[bar].map_idx;
spin_lock(&zpci_iomap_lock);
- if (zpci_iomap_start[idx].count++) {
- BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
- zpci_iomap_start[idx].bar != bar);
- } else {
- zpci_iomap_start[idx].fh = zdev->fh;
- zpci_iomap_start[idx].bar = bar;
- }
/* Detect overrun */
- BUG_ON(!zpci_iomap_start[idx].count);
+ WARN_ON(!++zpci_iomap_start[idx].count);
+ zpci_iomap_start[idx].fh = zdev->fh;
+ zpci_iomap_start[idx].bar = bar;
spin_unlock(&zpci_iomap_lock);
- addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
- return (void __iomem *) addr + offset;
+ return (void __iomem *) ZPCI_ADDR(idx) + offset;
}
EXPORT_SYMBOL(pci_iomap_range);
@@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap);
void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
{
- unsigned int idx;
+ unsigned int idx = ZPCI_IDX(addr);
- idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
spin_lock(&zpci_iomap_lock);
/* Detect underrun */
- BUG_ON(!zpci_iomap_start[idx].count);
+ WARN_ON(!zpci_iomap_start[idx].count);
if (!--zpci_iomap_start[idx].count) {
zpci_iomap_start[idx].fh = 0;
zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@ static void zpci_irq_exit(void)
static int zpci_alloc_iomap(struct zpci_dev *zdev)
{
- int entry;
+ unsigned long entry;
spin_lock(&zpci_iomap_lock);
- entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
- if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+ entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+ if (entry == ZPCI_IOMAP_ENTRIES) {
spin_unlock(&zpci_iomap_lock);
return -ENOSPC;
}
- set_bit(entry, zpci_iomap);
+ set_bit(entry, zpci_iomap_bitmap);
spin_unlock(&zpci_iomap_lock);
return entry;
}
@@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
{
spin_lock(&zpci_iomap_lock);
memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
- clear_bit(entry, zpci_iomap);
+ clear_bit(entry, zpci_iomap_bitmap);
spin_unlock(&zpci_iomap_lock);
}
@@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
if (zdev->bars[i].val & 4)
flags |= IORESOURCE_MEM_64;
- addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+ addr = ZPCI_ADDR(entry);
size = 1UL << zdev->bars[i].size;
res = __alloc_res(zdev, addr, size, flags);
@@ -643,12 +637,11 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int pcibios_add_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
- zdev->pdev = pdev;
pdev->dev.groups = zpci_attr_groups;
+ pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_BAR_COUNT; i++) {
@@ -670,8 +663,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
struct zpci_dev *zdev = to_zpci(pdev);
- zdev->pdev = pdev;
- zpci_debug_init_device(zdev);
+ zpci_debug_init_device(zdev, dev_name(&pdev->dev));
zpci_fmb_enable_device(zdev);
return pci_enable_resources(pdev, mask);
@@ -683,7 +675,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
- zdev->pdev = NULL;
}
#ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -701,8 +692,7 @@ static int zpci_restore(struct device *dev)
goto out;
zpci_map_resources(pdev);
- zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
- zdev->start_dma + zdev->iommu_size - 1,
+ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
(u64) zdev->dma_table);
out:
@@ -871,26 +861,36 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
+ BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+ __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
- 16, 0, NULL);
+ __alignof__(struct zpci_fmb), 0, NULL);
if (!zdev_fmb_cache)
- goto error_zdev;
+ goto error_fmb;
- /* TODO: use realloc */
- zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
- GFP_KERNEL);
+ zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+ sizeof(*zpci_iomap_start), GFP_KERNEL);
if (!zpci_iomap_start)
goto error_iomap;
- return 0;
+ zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+ sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+ if (!zpci_iomap_bitmap)
+ goto error_iomap_bitmap;
+
+ return 0;
+error_iomap_bitmap:
+ kfree(zpci_iomap_start);
error_iomap:
kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
return -ENOMEM;
}
static void zpci_mem_exit(void)
{
+ kfree(zpci_iomap_bitmap);
kfree(zpci_iomap_start);
kmem_cache_destroy(zdev_fmb_cache);
}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d6e411ed8b1f..1a4512c8544a 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -8,13 +8,19 @@
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/compat.h>
#include <linux/kernel.h>
+#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/pci.h>
+#include <linux/uaccess.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+#include <asm/compat.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
@@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
}
/*
- * Call Logical Processor
- * Retry logic is handled by the caller.
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
*/
-static inline u8 clp_instr(void *data)
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+ unsigned long mask;
+ int cc = 3;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+ : "cc");
+ *ilp = mask;
+ return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static inline int clp_req(void *data, unsigned int lps)
{
struct { u8 _[CLP_BLK_SIZE]; } *req = data;
u64 ignored;
- u8 cc;
+ int cc = 3;
asm volatile (
- " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
- " ipm %[cc]\n"
+ " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+ "0: ipm %[cc]\n"
" srl %[cc],28\n"
- : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
- : [req] "a" (req)
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : [req] "a" (req), [lps] "i" (lps)
: "cc");
return cc;
}
@@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.pfgid = pfgid;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
clp_store_query_pci_fngrp(zdev, &rrb->response);
else {
@@ -143,13 +171,12 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.fh = fh;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
rc = clp_store_query_pci_fn(zdev, &rrb->response);
if (rc)
goto out;
- if (rrb->response.pfgid)
- rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+ rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
} else {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -214,7 +241,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
retries--;
if (retries < 0)
@@ -280,7 +307,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
rrb->request.resume_token = resume_token;
/* Get PCI function handle list */
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("List PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -391,3 +418,198 @@ int clp_rescan_pci_devices_simple(void)
clp_free_block(rrb);
return rc;
}
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_base_slpc(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+ struct clp_req_rsp_query_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+ struct clp_req_rsp_query_pci_grp *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+ lpcb->request.reserved4 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_pci_slpc(req, (void *) lpcb);
+ case 0x0002: /* list PCI functions */
+ return clp_pci_list(req, (void *) lpcb);
+ case 0x0003: /* query PCI function */
+ return clp_pci_query(req, (void *) lpcb);
+ case 0x0004: /* query PCI function group */
+ return clp_pci_query_grp(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+ struct clp_req_hdr *lpcb;
+ void __user *uptr;
+ int rc;
+
+ rc = -EINVAL;
+ if (req->lps != 0 && req->lps != 2)
+ goto out;
+
+ rc = -ENOMEM;
+ lpcb = clp_alloc_block(GFP_KERNEL);
+ if (!lpcb)
+ goto out;
+
+ rc = -EFAULT;
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = -EINVAL;
+ if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+ goto out_free;
+
+ switch (req->lps) {
+ case 0:
+ rc = clp_base_command(req, lpcb);
+ break;
+ case 2:
+ rc = clp_pci_command(req, lpcb);
+ break;
+ }
+ if (rc)
+ goto out_free;
+
+ rc = -EFAULT;
+ if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = 0;
+
+out_free:
+ clp_free_block(lpcb);
+out:
+ return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+ void __user *uptr;
+ unsigned long ilp;
+ int exists;
+
+ if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+ return -EINVAL;
+
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (req->cmd == 0) {
+ /* Command code 0: test for a specific processor */
+ exists = test_bit_inv(req->lps, &ilp);
+ return put_user(exists, (int __user *) uptr);
+ }
+ /* Command code 1: return bit mask of installed processors */
+ return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct clp_req req;
+ void __user *argp;
+
+ if (cmd != CLP_SYNC)
+ return -EINVAL;
+
+ argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+ if (req.r != 0)
+ return -EINVAL;
+ return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .release = clp_misc_release,
+ .unlocked_ioctl = clp_misc_ioctl,
+ .compat_ioctl = clp_misc_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "clp",
+ .fops = &clp_misc_fops,
+};
+
+static int __init clp_misc_init(void)
+{
+ return misc_register(&clp_misc_device);
+}
+
+device_initcall(clp_misc_init);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 4129b0a5fd78..c555de3d12d6 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = {
.release = single_release,
};
-void zpci_debug_init_device(struct zpci_dev *zdev)
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
{
- zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
- debugfs_root);
+ zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
if (IS_ERR(zdev->debugfs_dev))
zdev->debugfs_dev = NULL;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 37d10f74425a..e595e89eac65 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -33,7 +33,7 @@ unsigned long *dma_alloc_cpu_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_TABLE_INVALID;
return table;
}
@@ -51,7 +51,7 @@ static unsigned long *dma_alloc_page_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_PTE_INVALID;
return table;
}
@@ -95,7 +95,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry)
return pto;
}
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
{
unsigned long *sto, *pto;
unsigned int rtx, sx, px;
@@ -114,20 +114,10 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr
return &pto[px];
}
-void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr,
- dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
{
- unsigned long *entry;
-
- entry = dma_walk_cpu_trans(dma_table, dma_addr);
- if (!entry) {
- WARN_ON_ONCE(1);
- return;
- }
-
if (flags & ZPCI_PTE_INVALID) {
invalidate_pt_entry(entry);
- return;
} else {
set_pt_pfaa(entry, page_addr);
validate_pt_entry(entry);
@@ -146,18 +136,25 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
u8 *page_addr = (u8 *) (pa & PAGE_MASK);
dma_addr_t start_dma_addr = dma_addr;
unsigned long irq_flags;
+ unsigned long *entry;
int i, rc = 0;
if (!nr_pages)
return -EINVAL;
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table)
+ if (!zdev->dma_table) {
+ rc = -EINVAL;
goto no_refresh;
+ }
for (i = 0; i < nr_pages; i++) {
- dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr,
- flags);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
page_addr += PAGE_SIZE;
dma_addr += PAGE_SIZE;
}
@@ -176,6 +173,18 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
nr_pages * PAGE_SIZE);
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -208,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table)
dma_free_cpu_table(table);
}
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+static unsigned long __dma_alloc_iommu(struct device *dev,
unsigned long start, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
start, size, 0, boundary_size, 0);
}
-static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+static unsigned long dma_alloc_iommu(struct device *dev, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long offset, flags;
int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+ offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
if (offset == -1) {
/* wrap-around */
- offset = __dma_alloc_iommu(zdev, 0, size);
+ offset = __dma_alloc_iommu(dev, 0, size);
wrap = 1;
}
@@ -242,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
return offset;
}
-static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long flags;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
@@ -260,6 +272,16 @@ out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
+{
+ struct {
+ unsigned long rc;
+ unsigned long addr;
+ } __packed data = {rc, addr};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
@@ -270,33 +292,40 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
dma_addr_t dma_addr;
+ int ret;
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
- if (iommu_page_index == -1)
+ iommu_page_index = dma_alloc_iommu(dev, nr_pages);
+ if (iommu_page_index == -1) {
+ ret = -ENOSPC;
goto out_err;
+ }
/* Use rounded up size */
size = nr_pages * PAGE_SIZE;
dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
- if (dma_addr + size > zdev->end_dma)
+ if (dma_addr + size > zdev->end_dma) {
+ ret = -ERANGE;
goto out_free;
+ }
if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
flags |= ZPCI_TABLE_PROTECTED;
- if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
- }
+ ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+ if (ret)
+ goto out_free;
+
+ atomic64_add(nr_pages, &zdev->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
out_free:
- dma_free_iommu(zdev, iommu_page_index, nr_pages);
+ dma_free_iommu(dev, iommu_page_index, nr_pages);
out_err:
zpci_err("map error:\n");
- zpci_err_hex(&pa, sizeof(pa));
+ zpci_err_dma(ret, pa);
return DMA_ERROR_CODE;
}
@@ -306,19 +335,21 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long iommu_page_index;
- int npages;
+ int npages, ret;
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr = dma_addr & PAGE_MASK;
- if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+ ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+ ZPCI_PTE_INVALID);
+ if (ret) {
zpci_err("unmap error:\n");
- zpci_err_hex(&dma_addr, sizeof(dma_addr));
+ zpci_err_dma(ret, dma_addr);
+ return;
}
atomic64_add(npages, &zdev->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
- dma_free_iommu(zdev, iommu_page_index, npages);
+ dma_free_iommu(dev, iommu_page_index, npages);
}
static void *s390_dma_alloc(struct device *dev, size_t size,
@@ -338,8 +369,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
pa = page_to_phys(page);
memset((void *) pa, 0, size);
- map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE,
- size, DMA_BIDIRECTIONAL, NULL);
+ map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
if (dma_mapping_error(dev, map)) {
free_pages(pa, get_order(size));
return NULL;
@@ -430,7 +460,19 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
goto out_clean;
}
- zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
+ /*
+ * Restrict the iommu bitmap size to the minimum of the following:
+ * - main memory size
+ * - 3-level pagetable address limit minus start_dma offset
+ * - DMA address range allowed by the hardware (clp query pci fn)
+ *
+ * Also set zdev->end_dma to the actual end address of the usable
+ * range, instead of the theoretical maximum as reported by hardware.
+ */
+ zdev->iommu_size = min3((u64) high_memory,
+ ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
+ zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->iommu_bitmap) {
@@ -438,10 +480,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
goto out_reg;
}
- rc = zpci_register_ioat(zdev,
- 0,
- zdev->start_dma + PAGE_OFFSET,
- zdev->start_dma + zdev->iommu_size - 1,
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
(u64) zdev->dma_table);
if (rc)
goto out_reg;
@@ -508,7 +547,7 @@ static int __init dma_debug_do_init(void)
}
fs_initcall(dma_debug_do_init);
-struct dma_map_ops s390_dma_ops = {
+struct dma_map_ops s390_pci_dma_ops = {
.alloc = s390_dma_alloc,
.free = s390_dma_free,
.map_sg = s390_dma_map_sg,
@@ -519,7 +558,7 @@ struct dma_map_ops s390_dma_ops = {
.is_phys = 0,
/* dma_supported is unconditionally true without a callback */
};
-EXPORT_SYMBOL_GPL(s390_dma_ops);
+EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
static int __init s390_iommu_setup(char *str)
{
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 369a3e05d468..fb2a9a560fdc 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,13 +46,22 @@ struct zpci_ccdf_avail {
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+ if (!pdev)
+ return;
+
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_dev_put(pdev);
}
void zpci_event_error(void *data)
@@ -64,9 +73,12 @@ void zpci_event_error(void *data)
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
int ret;
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
zpci_err("avail CCDF:\n");
@@ -133,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
+ if (pdev)
+ pci_dev_put(pdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
new file mode 100644
index 000000000000..72a4b2cf1365
--- /dev/null
+++ b/arch/s390/tools/.gitignore
@@ -0,0 +1 @@
+gen_facilities
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
new file mode 100644
index 000000000000..6d9814c9df2b
--- /dev/null
+++ b/arch/s390/tools/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for s390 specific build tools
+#
+
+hostprogs-y += gen_facilities
+HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE)
+
+define filechk_facilities.h
+ $(obj)/gen_facilities
+endef
+
+$(obj)/gen_facilities.o: $(srctree)/arch/s390/tools/gen_facilities.c
+
+include/generated/facilities.h: $(obj)/gen_facilities FORCE
+ $(call filechk,facilities.h)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
new file mode 100644
index 000000000000..e2660d27889b
--- /dev/null
+++ b/arch/s390/tools/gen_facilities.c
@@ -0,0 +1,67 @@
+/*
+ * Simple program to generate defines out of facility lists that use the bit
+ * numbering scheme from the Princples of Operations: most significant bit
+ * has bit number 0.
+ *
+ * Copyright IBM Corp. 2015
+ *
+ */
+
+#define S390_GEN_FACILITIES_C
+
+#include <strings.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <asm/facilities_src.h>
+
+static void print_facility_list(struct facility_def *def)
+{
+ unsigned int high, bit, dword, i;
+ unsigned long long *array;
+
+ array = calloc(1, 8);
+ if (!array)
+ exit(EXIT_FAILURE);
+ high = 0;
+ for (i = 0; def->bits[i] != -1; i++) {
+ bit = 63 - (def->bits[i] & 63);
+ dword = def->bits[i] / 64;
+ if (dword > high) {
+ array = realloc(array, (dword + 1) * 8);
+ if (!array)
+ exit(EXIT_FAILURE);
+ memset(array + high + 1, 0, (dword - high) * 8);
+ high = dword;
+ }
+ array[dword] |= 1ULL << bit;
+ }
+ printf("#define %s ", def->name);
+ for (i = 0; i <= high; i++)
+ printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n');
+ printf("#define %s_DWORDS %d\n", def->name, high + 1);
+ free(array);
+}
+
+static void print_facility_lists(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(facility_defs) / sizeof(facility_defs[0]); i++)
+ print_facility_list(&facility_defs[i]);
+}
+
+int main(int argc, char **argv)
+{
+ printf("#ifndef __ASM_S390_FACILITIES__\n");
+ printf("#define __ASM_S390_FACILITIES__\n");
+ printf("/*\n");
+ printf(" * DO NOT MODIFY.\n");
+ printf(" *\n");
+ printf(" * This file was generated by %s\n", __FILE__);
+ printf(" */\n\n");
+ printf("#include <linux/const.h>\n\n");
+ print_facility_lists();
+ printf("\n#endif\n");
+ return 0;
+}