summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 18:45:46 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 18:45:46 +0200
commit9e85ae6af6e907975f68d82ff127073ec024cb05 (patch)
tree3d3349b03da858e53ef8f8dce467e4a691eabf88 /arch
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mat... (diff)
parents390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs (diff)
downloadlinux-9e85ae6af6e907975f68d82ff127073ec024cb05.tar.xz
linux-9e85ae6af6e907975f68d82ff127073ec024cb05.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky: "The first part of the s390 updates for 4.14: - Add machine type 0x3906 for IBM z14 - Add IBM z14 TLB flushing improvements for KVM guests - Exploit the TOD clock epoch extension to provide a continuous TOD clock afer 2042/09/17 - Add NIAI spinlock hints for IBM z14 - Rework the vmcp driver and use CMA for the respone buffer of z/VM CP commands - Drop some s390 specific asm headers and use the generic version - Add block discard for DASD-FBA devices under z/VM - Add average request times to DASD statistics - A few of those constify patches which seem to be in vogue right now - Cleanup and bug fixes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits) s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs s390/dasd: Add discard support for FBA devices s390/zcrypt: make CPRBX const s390/uaccess: avoid mvcos jump label s390/mm: use generic mm_hooks s390/facilities: fix typo s390/vmcp: simplify vmcp_response_free() s390/topology: Remove the unused parent_node() macro s390/dasd: Change unsigned long long to unsigned long s390/smp: convert cpuhp_setup_state() return code to zero on success s390: fix 'novx' early parameter handling s390/dasd: add average request times to dasd statistics s390/scm: use common completion path s390/pci: log changes to uid checking s390/vmcp: simplify vmcp_ioctl() s390/vmcp: return -ENOTTY for unknown ioctl commands s390/vmcp: split vmcp header file and move to uapi s390/vmcp: make use of contiguous memory allocator s390/cpcmd,vmcp: avoid GFP_DMA allocations s390/vmcp: fix uaccess check and avoid undefined behavior ...
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig18
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/cpcmd.h7
-rw-r--r--arch/s390/include/asm/ebcdic.h4
-rw-r--r--arch/s390/include/asm/elf.h2
-rw-r--r--arch/s390/include/asm/ipl.h2
-rw-r--r--arch/s390/include/asm/lowcore.h48
-rw-r--r--arch/s390/include/asm/mman.h11
-rw-r--r--arch/s390/include/asm/mmu_context.h33
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/page-states.h1
-rw-r--r--arch/s390/include/asm/page.h37
-rw-r--r--arch/s390/include/asm/pgalloc.h18
-rw-r--r--arch/s390/include/asm/pgtable.h197
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/setup.h17
-rw-r--r--arch/s390/include/asm/spinlock.h9
-rw-r--r--arch/s390/include/asm/timex.h40
-rw-r--r--arch/s390/include/asm/tlb.h6
-rw-r--r--arch/s390/include/asm/tlbflush.h7
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/types.h11
-rw-r--r--arch/s390/include/asm/unaligned.h13
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/dasd.h6
-rw-r--r--arch/s390/include/uapi/asm/swab.h89
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h24
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cpcmd.c13
-rw-r--r--arch/s390/kernel/debug.c9
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/early.c17
-rw-r--r--arch/s390/kernel/head.S3
-rw-r--r--arch/s390/kernel/head64.S4
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/relocate_kernel.S5
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/suspend.c24
-rw-r--r--arch/s390/kernel/time.c67
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S4
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S4
-rw-r--r--arch/s390/kvm/diag.c8
-rw-r--r--arch/s390/kvm/gaccess.c35
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/kvm/vsie.c2
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/spinlock.c87
-rw-r--r--arch/s390/lib/uaccess.c38
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/s390/mm/gmap.c163
-rw-r--r--arch/s390/mm/init.c60
-rw-r--r--arch/s390/mm/page-states.c192
-rw-r--r--arch/s390/mm/pageattr.c5
-rw-r--r--arch/s390/mm/pgalloc.c12
-rw-r--r--arch/s390/mm/pgtable.c154
-rw-r--r--arch/s390/mm/vmem.c47
-rw-r--r--arch/s390/pci/pci_clp.c10
-rw-r--r--arch/s390/tools/gen_facilities.c5
61 files changed, 1020 insertions, 609 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7eeb75d758c1..48af970320cb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -222,6 +222,10 @@ config HAVE_MARCH_Z13_FEATURES
def_bool n
select HAVE_MARCH_ZEC12_FEATURES
+config HAVE_MARCH_Z14_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z13_FEATURES
+
choice
prompt "Processor type"
default MARCH_Z196
@@ -282,6 +286,14 @@ config MARCH_Z13
2964 series). The kernel will be slightly faster but will not work on
older machines.
+config MARCH_Z14
+ bool "IBM z14"
+ select HAVE_MARCH_Z14_FEATURES
+ help
+ Select this to enable optimizations for IBM z14 (3906 series).
+ The kernel will be slightly faster but will not work on older
+ machines.
+
endchoice
config MARCH_Z900_TUNE
@@ -305,6 +317,9 @@ config MARCH_ZEC12_TUNE
config MARCH_Z13_TUNE
def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
+config MARCH_Z14_TUNE
+ def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
+
choice
prompt "Tune code generation"
default TUNE_DEFAULT
@@ -343,6 +358,9 @@ config TUNE_ZEC12
config TUNE_Z13
bool "IBM z13"
+config TUNE_Z14
+ bool "IBM z14"
+
endchoice
config 64BIT
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 54e00526b8df..dac821cfcd43 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -31,7 +31,8 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
mflags-$(CONFIG_MARCH_Z10) := -march=z10
mflags-$(CONFIG_MARCH_Z196) := -march=z196
mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
-mflags-$(CONFIG_MARCH_Z13) := -march=z13
+mflags-$(CONFIG_MARCH_Z13) := -march=z13
+mflags-$(CONFIG_MARCH_Z14) := -march=z14
export CC_FLAGS_MARCH := $(mflags-y)
@@ -44,7 +45,8 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
-cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
+cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
+cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3c88479feba..6e2c9f7e47fa 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -16,4 +16,5 @@ generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += trace_clock.h
+generic-y += unaligned.h
generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
index 3dfadb5d648f..ca2b0624ad46 100644
--- a/arch/s390/include/asm/cpcmd.h
+++ b/arch/s390/include/asm/cpcmd.h
@@ -10,9 +10,8 @@
/*
* the lowlevel function for cpcmd
- * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
*/
-extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
/*
* cpcmd is the in-kernel interface for issuing CP commands
@@ -25,8 +24,8 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code
* response_code: return pointer for VM's error code
* return value: the size of the response. The caller can check if the buffer
* was large enough by comparing the return value and rlen
- * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ * NOTE: If the response buffer is not in real storage, cpcmd can sleep
*/
-extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index c5befc5a3bf5..b71735eab23f 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -9,9 +9,7 @@
#ifndef _EBCDIC_H
#define _EBCDIC_H
-#ifndef _S390_TYPES_H
-#include <types.h>
-#endif
+#include <linux/types.h>
extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */
extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c92ed0170be2..65998a1f5d43 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -191,7 +191,7 @@ struct arch_elf_state {
} while (0)
#define CORE_DUMP_USE_REGSET
-#define ELF_EXEC_PAGESIZE 4096
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index edb5161df7e2..6810bd757312 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -81,7 +81,7 @@ struct ipl_parameter_block {
struct ipl_block_fcp fcp;
struct ipl_block_ccw ccw;
} ipl_info;
-} __attribute__((packed,aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
/*
* IPL validity flags
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8a5b082797f8..a6870ea6ea8b 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -95,46 +95,46 @@ struct lowcore {
__u64 int_clock; /* 0x0310 */
__u64 mcck_clock; /* 0x0318 */
__u64 clock_comparator; /* 0x0320 */
+ __u64 boot_clock[2]; /* 0x0328 */
/* Current process. */
- __u64 current_task; /* 0x0328 */
- __u8 pad_0x318[0x320-0x318]; /* 0x0330 */
- __u64 kernel_stack; /* 0x0338 */
+ __u64 current_task; /* 0x0338 */
+ __u64 kernel_stack; /* 0x0340 */
/* Interrupt, panic and restart stack. */
- __u64 async_stack; /* 0x0340 */
- __u64 panic_stack; /* 0x0348 */
- __u64 restart_stack; /* 0x0350 */
+ __u64 async_stack; /* 0x0348 */
+ __u64 panic_stack; /* 0x0350 */
+ __u64 restart_stack; /* 0x0358 */
/* Restart function and parameter. */
- __u64 restart_fn; /* 0x0358 */
- __u64 restart_data; /* 0x0360 */
- __u64 restart_source; /* 0x0368 */
+ __u64 restart_fn; /* 0x0360 */
+ __u64 restart_data; /* 0x0368 */
+ __u64 restart_source; /* 0x0370 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0370 */
- __u64 user_asce; /* 0x0378 */
+ __u64 kernel_asce; /* 0x0378 */
+ __u64 user_asce; /* 0x0380 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
- __u32 lpp; /* 0x0380 */
- __u32 current_pid; /* 0x0384 */
+ __u32 lpp; /* 0x0388 */
+ __u32 current_pid; /* 0x038c */
/* SMP info area */
- __u32 cpu_nr; /* 0x0388 */
- __u32 softirq_pending; /* 0x038c */
- __u64 percpu_offset; /* 0x0390 */
- __u64 vdso_per_cpu_data; /* 0x0398 */
- __u64 machine_flags; /* 0x03a0 */
- __u32 preempt_count; /* 0x03a8 */
- __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */
- __u64 gmap; /* 0x03b0 */
- __u32 spinlock_lockval; /* 0x03b8 */
- __u32 fpu_flags; /* 0x03bc */
- __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
+ __u32 cpu_nr; /* 0x0390 */
+ __u32 softirq_pending; /* 0x0394 */
+ __u64 percpu_offset; /* 0x0398 */
+ __u64 vdso_per_cpu_data; /* 0x03a0 */
+ __u64 machine_flags; /* 0x03a8 */
+ __u32 preempt_count; /* 0x03b0 */
+ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
+ __u64 gmap; /* 0x03b8 */
+ __u32 spinlock_lockval; /* 0x03c0 */
+ __u32 fpu_flags; /* 0x03c4 */
+ __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
deleted file mode 100644
index b79813d9cf68..000000000000
--- a/arch/s390/include/asm/mman.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/mman.h"
- */
-#ifndef __S390_MMAN_H__
-#define __S390_MMAN_H__
-
-#include <uapi/asm/mman.h>
-
-#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 24bc41622a98..72e9ca83a668 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,6 +12,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctl_reg.h>
+#include <asm-generic/mm_hooks.h>
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
@@ -33,7 +34,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.use_cmma = 0;
#endif
switch (mm->context.asce_limit) {
- case 1UL << 42:
+ case _REGION2_SIZE:
/*
* forked 3-level task, fall through to set new asce with new
* mm->pgd
@@ -49,12 +50,12 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
break;
- case 1UL << 53:
+ case _REGION1_SIZE:
/* forked 4-level task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
break;
- case 1UL << 31:
+ case _REGION3_SIZE:
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
@@ -138,30 +139,4 @@ static inline void activate_mm(struct mm_struct *prev,
set_user_asce(next);
}
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
-{
-}
-
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-
-static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
- bool write, bool execute, bool foreign)
-{
- /* by default, allow everything */
- return true;
-}
#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 9d91cf3e427f..c8e211b9a002 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -72,7 +72,7 @@ union mci {
u64 ar : 1; /* 33 access register validity */
u64 da : 1; /* 34 delayed access exception */
u64 : 1; /* 35 */
- u64 gs : 1; /* 36 guarded storage registers */
+ u64 gs : 1; /* 36 guarded storage registers validity */
u64 : 5; /* 37-41 */
u64 pr : 1; /* 42 tod programmable register validity */
u64 fc : 1; /* 43 fp control register validity */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 42267a2fe29e..ca21b28a7b17 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -13,6 +13,7 @@
#define ESSA_SET_POT_VOLATILE 4
#define ESSA_SET_STABLE_RESIDENT 5
#define ESSA_SET_STABLE_IF_RESIDENT 6
+#define ESSA_SET_STABLE_NODAT 7
#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 624deaa44230..5d5c2b3500a4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,10 +10,14 @@
#include <linux/const.h>
#include <asm/types.h>
+#define _PAGE_SHIFT 12
+#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
+#define _PAGE_MASK (~(_PAGE_SIZE - 1))
+
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_SHIFT _PAGE_SHIFT
+#define PAGE_SIZE _PAGE_SIZE
+#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC 0
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
@@ -133,6 +137,9 @@ static inline int page_reset_referenced(unsigned long addr)
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
+void arch_set_page_dat(struct page *page, int order);
+void arch_set_page_nodat(struct page *page, int order);
+int arch_test_page_nodat(struct page *page);
void arch_set_page_states(int make_stable);
static inline int devmem_is_allowed(unsigned long pfn)
@@ -145,16 +152,26 @@ static inline int devmem_is_allowed(unsigned long pfn)
#endif /* !__ASSEMBLY__ */
-#define __PAGE_OFFSET 0x0UL
-#define PAGE_OFFSET 0x0UL
-#define __pa(x) (unsigned long)(x)
-#define __va(x) (void *)(unsigned long)(x)
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define __PAGE_OFFSET 0x0UL
+#define PAGE_OFFSET 0x0UL
+
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)(unsigned long)(x))
+
+#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
+#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bb0ff1bb0c4a..a0d9167519b1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -15,6 +15,8 @@
#include <linux/gfp.h>
#include <linux/mm.h>
+#define CRST_ALLOC_ORDER 2
+
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
@@ -42,16 +44,16 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
- clear_table(crst, entry, sizeof(unsigned long)*2048);
+ clear_table(crst, entry, _CRST_TABLE_SIZE);
}
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
- if (mm->context.asce_limit <= (1UL << 31))
+ if (mm->context.asce_limit <= _REGION3_SIZE)
return _SEGMENT_ENTRY_EMPTY;
- if (mm->context.asce_limit <= (1UL << 42))
+ if (mm->context.asce_limit <= _REGION2_SIZE)
return _REGION3_ENTRY_EMPTY;
- if (mm->context.asce_limit <= (1UL << 53))
+ if (mm->context.asce_limit <= _REGION1_SIZE)
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
}
@@ -119,7 +121,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (!table)
return NULL;
- if (mm->context.asce_limit == (1UL << 31)) {
+ if (mm->context.asce_limit == _REGION3_SIZE) {
/* Forking a compat process with 2 page table levels */
if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
crst_table_free(mm, table);
@@ -131,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- if (mm->context.asce_limit == (1UL << 31))
+ if (mm->context.asce_limit == _REGION3_SIZE)
pgtable_pmd_page_dtor(virt_to_page(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
@@ -158,4 +160,8 @@ static inline void pmd_populate(struct mm_struct *mm,
extern void rcu_table_freelist_finish(void);
+void vmem_map_init(void);
+void *vmem_crst_alloc(unsigned long val);
+pte_t *vmem_pte_alloc(void);
+
#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57057fb1cc07..dce708e061ea 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -11,19 +11,6 @@
#ifndef _ASM_S390_PGTABLE_H
#define _ASM_S390_PGTABLE_H
-/*
- * The Linux memory management assumes a three-level page table setup.
- * For s390 64 bit we use up to four of the five levels the hardware
- * provides (region first tables are not used).
- *
- * The "pgd_xxx()" functions are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- *
- * This file contains the functions and defines necessary to modify and use
- * the S390 page table tree.
- */
-#ifndef __ASSEMBLY__
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -34,9 +21,6 @@
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
-extern void vmem_map_init(void);
-pmd_t *vmem_pmd_alloc(void);
-pte_t *vmem_pte_alloc(void);
enum {
PG_DIRECT_MAP_4K = 0,
@@ -77,38 +61,6 @@ extern unsigned long zero_page_mask;
#define __HAVE_COLOR_ZERO_PAGE
/* TODO: s390 cannot support io_remap_pfn_range... */
-#endif /* !__ASSEMBLY__ */
-
-/*
- * PMD_SHIFT determines the size of the area a second-level page
- * table can map
- * PGDIR_SHIFT determines what a third-level page table entry can map
- */
-#define PMD_SHIFT 20
-#define PUD_SHIFT 31
-#define P4D_SHIFT 42
-#define PGDIR_SHIFT 53
-
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-#define P4D_SIZE (1UL << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE-1))
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/*
- * entries per page directory level: the S390 is two-level, so
- * we don't really have any PMD directory physically.
- * for S390 segment-table entries are combined to one PGD
- * that leads to 1024 pte per pgd
- */
-#define PTRS_PER_PTE 256
-#define PTRS_PER_PMD 2048
-#define PTRS_PER_PUD 2048
-#define PTRS_PER_P4D 2048
-#define PTRS_PER_PGD 2048
#define FIRST_USER_ADDRESS 0UL
@@ -123,7 +75,6 @@ extern unsigned long zero_page_mask;
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
-#ifndef __ASSEMBLY__
/*
* The vmalloc and module area will always be on the topmost area of the
* kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
@@ -269,7 +220,7 @@ static inline int is_module_addr(void *addr)
*/
/* Bits in the segment/region table address-space-control-element */
-#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */
+#define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */
#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */
@@ -320,9 +271,9 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
-#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
-#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_NOEXEC 0x100 /* region no-execute bit */
+#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
+#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY (0)
@@ -340,6 +291,54 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
#endif
+#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
+#define _PAGE_ENTRIES 256 /* number of page table entries */
+
+#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
+#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
+
+#define _REGION1_SHIFT 53
+#define _REGION2_SHIFT 42
+#define _REGION3_SHIFT 31
+#define _SEGMENT_SHIFT 20
+
+#define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT)
+#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT)
+#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT)
+#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT)
+#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT)
+
+#define _REGION1_SIZE (1UL << _REGION1_SHIFT)
+#define _REGION2_SIZE (1UL << _REGION2_SHIFT)
+#define _REGION3_SIZE (1UL << _REGION3_SHIFT)
+#define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT)
+
+#define _REGION1_MASK (~(_REGION1_SIZE - 1))
+#define _REGION2_MASK (~(_REGION2_SIZE - 1))
+#define _REGION3_MASK (~(_REGION3_SIZE - 1))
+#define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1))
+
+#define PMD_SHIFT _SEGMENT_SHIFT
+#define PUD_SHIFT _REGION3_SHIFT
+#define P4D_SHIFT _REGION2_SHIFT
+#define PGDIR_SHIFT _REGION1_SHIFT
+
+#define PMD_SIZE _SEGMENT_SIZE
+#define PUD_SIZE _REGION3_SIZE
+#define P4D_SIZE _REGION2_SIZE
+#define PGDIR_SIZE _REGION1_SIZE
+
+#define PMD_MASK _SEGMENT_MASK
+#define PUD_MASK _REGION3_MASK
+#define P4D_MASK _REGION2_MASK
+#define PGDIR_MASK _REGION1_MASK
+
+#define PTRS_PER_PTE _PAGE_ENTRIES
+#define PTRS_PER_PMD _CRST_ENTRIES
+#define PTRS_PER_PUD _CRST_ENTRIES
+#define PTRS_PER_P4D _CRST_ENTRIES
+#define PTRS_PER_PGD _CRST_ENTRIES
+
/*
* Segment table and region3 table entry encoding
* (R = read-only, I = invalid, y = young bit):
@@ -376,6 +375,7 @@ static inline int is_module_addr(void *addr)
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_NODAT 0x0000000040000000UL
#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
@@ -505,7 +505,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
*/
-#define mm_forbids_zeropage mm_use_skey
+#define mm_forbids_zeropage mm_has_pgste
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -952,15 +952,30 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_GLOBAL 0
#define IPTE_LOCAL 1
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
+#define IPTE_NODAT 0x400
+#define IPTE_GUEST_ASCE 0x800
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long pto = (unsigned long) ptep;
- /* Invalidation + TLB flush for the pte */
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* Invalidation + TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+ [m4] "i" (local));
+ return;
+ }
+
+ /* Invalidate ptes with options + TLB flush of the ptes */
+ opt = opt | (asce & _ASCE_ORIGIN);
asm volatile(
- " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
- : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
- [m4] "i" (local));
+ " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ : [r2] "+a" (address), [r3] "+a" (opt)
+ : [r1] "a" (pto), [m4] "i" (local) : "memory");
}
static inline void __ptep_ipte_range(unsigned long address, int nr,
@@ -1341,31 +1356,61 @@ static inline void __pmdp_csp(pmd_t *pmdp)
#define IDTE_GLOBAL 0
#define IDTE_LOCAL 1
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
+#define IDTE_PTOA 0x0800
+#define IDTE_NODAT 0x1000
+#define IDTE_GUEST_ASCE 0x2000
+
+static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long sto;
- sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
- asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
- : "+m" (*pmdp)
- : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
- [m4] "i" (local)
- : "cc" );
+ sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
+ [m4] "i" (local)
+ : "cc" );
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
}
-static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
+static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long r3o;
- r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+ r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
r3o |= _ASCE_TYPE_REGION3;
- asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
- : "+m" (*pudp)
- : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
- [m4] "i" (local)
- : "cc");
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
+ [m4] "i" (local)
+ : "cc");
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
}
pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
@@ -1548,8 +1593,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#endif /* !__ASSEMBLY__ */
-
#define kern_addr_valid(addr) (1)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 998b61cd0e56..eaee69e7c42a 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -80,7 +80,7 @@ struct qdr {
u32 qkey : 4;
u32 : 28;
struct qdesfmt0 qdf0[126];
-} __attribute__ ((packed, aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40
#define QIB_RFLAGS_ENABLE_QEBSM 0x80
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index cd78155b1829..490e035b3716 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,10 @@
#define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12)
#define MACHINE_FLAG_VX _BITUL(13)
-#define MACHINE_FLAG_NX _BITUL(14)
-#define MACHINE_FLAG_GS _BITUL(15)
+#define MACHINE_FLAG_TLB_GUEST _BITUL(14)
+#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_GS _BITUL(16)
+#define MACHINE_FLAG_SCC _BITUL(17)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -68,8 +70,10 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
/*
* Console mode. Override with conmode=
@@ -104,9 +108,16 @@ extern void pfault_fini(void);
#define pfault_fini() do { } while (0)
#endif /* CONFIG_PFAULT */
+#ifdef CONFIG_VMCP
+void vmcp_cma_reserve(void);
+#else
+static inline void vmcp_cma_reserve(void) { }
+#endif
+
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
-extern void cmma_init(void);
+void cmma_init(void);
+void cmma_init_nodat(void);
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 217ee5210c32..8182b521c42f 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -92,10 +92,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
asm volatile(
- "st %1,%0\n"
- : "+Q" (lp->lock)
- : "d" (0)
- : "cc", "memory");
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0070\n" /* NIAI 7 */
+#endif
+ " st %1,%0\n"
+ : "=Q" (lp->lock) : "d" (0) : "cc", "memory");
}
/*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 118535123f34..93f2eb3f277c 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -15,6 +15,8 @@
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+extern u64 clock_comparator_max;
+
/* Inline functions for clock register access. */
static inline int set_tod_clock(__u64 time)
{
@@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void)
unsigned long long old;
old = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
set_clock_comparator(S390_lowcore.clock_comparator);
return old;
}
@@ -174,24 +176,24 @@ static inline cycles_t get_cycles(void)
return (cycles_t) get_tod_clock() >> 2;
}
-int get_phys_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
unsigned long long monotonic_clock(void);
-extern u64 sched_clock_base_cc;
+extern unsigned char tod_clock_base[16] __aligned(8);
/**
* get_clock_monotonic - returns current time in clock rate units
*
* The caller must ensure that preemption is disabled.
- * The clock and sched_clock_base get changed via stop_machine.
+ * The clock and tod_clock_base get changed via stop_machine.
* Therefore preemption must be disabled when calling this
* function, otherwise the returned value is not guaranteed to
* be monotonic.
*/
static inline unsigned long long get_tod_clock_monotonic(void)
{
- return get_tod_clock() - sched_clock_base_cc;
+ return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
}
/**
@@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval)
return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
+/**
+ * tod_after - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a > (long long) b;
+ return a > b;
+}
+
+/**
+ * tod_after_eq - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after_eq(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a >= (long long) b;
+ return a >= b;
+}
+
#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 2eb8ff0d6fca..3a14b864b2e3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -135,7 +135,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 31))
+ if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
tlb_remove_table(tlb, pmd);
@@ -151,7 +151,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 53))
+ if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
return;
tlb_remove_table(tlb, p4d);
}
@@ -166,7 +166,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 42))
+ if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
return;
tlb_remove_table(tlb, pud);
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 39846100682a..4d759f8f4bc7 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void)
*/
static inline void __tlb_flush_idte(unsigned long asce)
{
+ unsigned long opt;
+
+ opt = IDTE_PTOA;
+ if (MACHINE_HAS_TLB_GUEST)
+ opt |= IDTE_GUEST_ASCE;
/* Global TLB flush for the mm */
asm volatile(
" .insn rrf,0xb98e0000,0,%0,%1,0"
- : : "a" (2048), "a" (asce) : "cc");
+ : : "a" (opt), "a" (asce) : "cc");
}
#ifdef CONFIG_SMP
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fa1bfce10370..5222da162b69 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -77,12 +77,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
return &node_to_cpumask_map[node];
}
-/*
- * Returns the number of the node containing node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
#define pcibus_to_node(bus) __pcibus_to_node(bus)
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
deleted file mode 100644
index 6740f4f9781f..000000000000
--- a/arch/s390/include/asm/types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/types.h"
- */
-#ifndef _S390_TYPES_H
-#define _S390_TYPES_H
-
-#include <uapi/asm/types.h>
-
-#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
deleted file mode 100644
index da9627afe5d8..000000000000
--- a/arch/s390/include/asm/unaligned.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_S390_UNALIGNED_H
-#define _ASM_S390_UNALIGNED_H
-
-/*
- * The S390 can do unaligned accesses itself.
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned __get_unaligned_be
-#define put_unaligned __put_unaligned_be
-
-#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index ca62066895e0..098f28778a13 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -9,4 +9,5 @@ generic-y += param.h
generic-y += poll.h
generic-y += resource.h
generic-y += sockios.h
+generic-y += swab.h
generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 1340311dab77..ab5797cdc1b7 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -72,7 +72,10 @@ typedef struct dasd_information2_t {
* 0x02: use diag discipline (diag)
* 0x04: set the device initially online (internal use only)
* 0x08: enable ERP related logging
- * 0x20: give access to raw eckd data
+ * 0x10: allow I/O to fail on lost paths
+ * 0x20: allow I/O to fail when a lock was stolen
+ * 0x40: give access to raw eckd data
+ * 0x80: enable discard support
*/
#define DASD_FEATURE_DEFAULT 0x00
#define DASD_FEATURE_READONLY 0x01
@@ -82,6 +85,7 @@ typedef struct dasd_information2_t {
#define DASD_FEATURE_FAILFAST 0x10
#define DASD_FEATURE_FAILONSLCK 0x20
#define DASD_FEATURE_USERAW 0x40
+#define DASD_FEATURE_DISCARD 0x80
#define DASD_PARTN_BITS 2
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
deleted file mode 100644
index da3bfe5cc161..000000000000
--- a/arch/s390/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _S390_SWAB_H
-#define _S390_SWAB_H
-
-/*
- * S390 version
- * Copyright IBM Corp. 1999
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#include <linux/types.h>
-
-#ifndef __s390x__
-# define __SWAB_64_THRU_32__
-#endif
-
-#ifdef __s390x__
-static inline __u64 __arch_swab64p(const __u64 *x)
-{
- __u64 result;
-
- asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
- return result;
-}
-#define __arch_swab64p __arch_swab64p
-
-static inline __u64 __arch_swab64(__u64 x)
-{
- __u64 result;
-
- asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
- return result;
-}
-#define __arch_swab64 __arch_swab64
-
-static inline void __arch_swab64s(__u64 *x)
-{
- *x = __arch_swab64p(x);
-}
-#define __arch_swab64s __arch_swab64s
-#endif /* __s390x__ */
-
-static inline __u32 __arch_swab32p(const __u32 *x)
-{
- __u32 result;
-
- asm volatile(
-#ifndef __s390x__
- " icm %0,8,%O1+3(%R1)\n"
- " icm %0,4,%O1+2(%R1)\n"
- " icm %0,2,%O1+1(%R1)\n"
- " ic %0,%1"
- : "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
- " lrv %0,%1"
- : "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
- return result;
-}
-#define __arch_swab32p __arch_swab32p
-
-#ifdef __s390x__
-static inline __u32 __arch_swab32(__u32 x)
-{
- __u32 result;
-
- asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x));
- return result;
-}
-#define __arch_swab32 __arch_swab32
-#endif /* __s390x__ */
-
-static inline __u16 __arch_swab16p(const __u16 *x)
-{
- __u16 result;
-
- asm volatile(
-#ifndef __s390x__
- " icm %0,2,%O1+1(%R1)\n"
- " ic %0,%1\n"
- : "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
- " lrvh %0,%1"
- : "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
- return result;
-}
-#define __arch_swab16p __arch_swab16p
-
-#endif /* _S390_SWAB_H */
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
new file mode 100644
index 000000000000..4caf71714a55
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright IBM Corp. 2004, 2005
+ * Interface implementation for communication with the z/VM control program
+ * Version 1.0
+ * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ *
+ * z/VMs CP offers the possibility to issue commands via the diagnose code 8
+ * this driver implements a character device that issues these commands and
+ * returns the answer of CP.
+ *
+ * The idea of this driver is based on cpint from Neale Ferguson
+ */
+
+#ifndef _UAPI_ASM_VMCP_H
+#define _UAPI_ASM_VMCP_H
+
+#include <linux/ioctl.h>
+
+#define VMCP_GETCODE _IOR(0x10, 1, int)
+#define VMCP_SETBUF _IOW(0x10, 2, int)
+#define VMCP_GETSIZE _IOR(0x10, 3, int)
+
+#endif /* _UAPI_ASM_VMCP_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index b65c414b6c0e..3d42f91c95fd 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -158,6 +158,7 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
+ OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 9f0e4a2785f7..63bc6603e0ed 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/mm.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
@@ -28,9 +29,7 @@ static int diag8_noresponse(int cmdlen)
register unsigned long reg3 asm ("3") = cmdlen;
asm volatile(
- " sam31\n"
" diag %1,%0,0x8\n"
- " sam64\n"
: "+d" (reg3) : "d" (reg2) : "cc");
return reg3;
}
@@ -43,9 +42,7 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
register unsigned long reg5 asm ("5") = *rlen;
asm volatile(
- " sam31\n"
" diag %2,%0,0x8\n"
- " sam64\n"
" brc 8,1f\n"
" agr %1,%4\n"
"1:\n"
@@ -57,7 +54,6 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
/*
* __cpcmd has some restrictions over cpcmd
- * - the response buffer must reside below 2GB (if any)
* - __cpcmd is unlocked and therefore not SMP-safe
*/
int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
@@ -88,13 +84,12 @@ EXPORT_SYMBOL(__cpcmd);
int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
{
+ unsigned long flags;
char *lowbuf;
int len;
- unsigned long flags;
- if ((virt_to_phys(response) != (unsigned long) response) ||
- (((unsigned long)response + rlen) >> 31)) {
- lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+ if (is_vmalloc_or_module_addr(response)) {
+ lowbuf = kmalloc(rlen, GFP_KERNEL);
if (!lowbuf) {
pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 86b3e74f569e..1d9e83c401fc 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -866,7 +866,8 @@ static inline void
debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
int exception)
{
- active->id.stck = get_tod_clock_fast() - sched_clock_base_cc;
+ active->id.stck = get_tod_clock_fast() -
+ *(unsigned long long *) &tod_clock_base[1];
active->id.fields.cpuid = smp_processor_id();
active->caller = __builtin_return_address(0);
active->id.fields.exception = exception;
@@ -1455,15 +1456,15 @@ int
debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
- unsigned long sec, usec;
+ unsigned long base, sec, usec;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12);
- sec = sec - (TOD_UNIX_EPOCH >> 12);
+ base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
+ sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
usec = do_div(sec, USEC_PER_SEC);
if (entry->id.fields.exception)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dab78babfab6..2aa545dca4d5 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -76,7 +76,7 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
sp = __dump_trace(func, data, sp,
- S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
S390_lowcore.panic_stack + frame_size);
#endif
sp = __dump_trace(func, data, sp,
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 5d20182ee8ae..ca8cd80e8feb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -53,8 +53,9 @@ static void __init reset_tod_clock(void)
if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
disabled_wait(0);
- sched_clock_base_cc = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = sched_clock_base_cc;
+ memset(tod_clock_base, 0, 16);
+ *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
}
#ifdef CONFIG_SHARED_KERNEL
@@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void)
}
/* re-initialize cputime accounting. */
- sched_clock_base_cc = get_tod_clock();
- S390_lowcore.last_update_clock = sched_clock_base_cc;
+ get_tod_clock_ext(tod_clock_base);
+ S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1];
S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
S390_lowcore.user_timer = 0;
S390_lowcore.system_timer = 0;
@@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void)
}
if (test_facility(133))
S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+ if (test_facility(139) && (tod_clock_base[1] & 0x80)) {
+ /* Enabled signed clock comparator comparisons */
+ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+ clock_comparator_max = -1ULL >> 1;
+ __ctl_set_bit(0, 53);
+ }
}
static inline void save_vector_registers(void)
@@ -413,7 +420,7 @@ static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
__ctl_clear_bit(0, 17);
- return 1;
+ return 0;
}
early_param("novx", disable_vector_extension);
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index eff5b31671d4..8ed753c72d9b 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -302,7 +302,8 @@ ENTRY(startup_kdump)
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
lctlg %c0,%c15,0x200(%r0) # initialize control registers
- stck __LC_LAST_UPDATE_CLOCK
+ stcke __LC_BOOT_CLOCK
+ mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
l %r15,.Lstack-.LPG0(%r13)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 31c91f24e562..0d8f2a858ced 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -21,8 +21,8 @@ ENTRY(startup_continue)
xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
mvi __LC_LPP,0x80 # and set LPP_MAGIC
.insn s,0xb2800000,__LC_LPP # load program parameter
-0: larl %r1,sched_clock_base_cc
- mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
+0: larl %r1,tod_clock_base
+ mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6dca93b29bed..a2fdff0e730b 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq)
old_regs = set_irq_regs(regs);
irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ if (tod_after_eq(S390_lowcore.int_clock,
+ S390_lowcore.clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index cfac28330b03..4bdc65636603 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -7,6 +7,7 @@
*/
#include <linux/linkage.h>
+#include <asm/page.h>
#include <asm/sigp.h>
/*
@@ -55,8 +56,8 @@ ENTRY(relocate_kernel)
.back_pgm:
lmg %r0,%r15,gprregs-.base(%r13)
.top:
- lghi %r7,4096 # load PAGE_SIZE in r7
- lghi %r9,4096 # load PAGE_SIZE in r9
+ lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
+ lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
lg %r5,0(%r2) # read another word for indirection page
aghi %r2,8 # increment pointer
tml %r5,0x1 # is it a destination page?
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3d1d808ea8a9..164a1e16b53e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,7 +305,7 @@ static void __init setup_lowcore(void)
/*
* Setup lowcore for boot cpu
*/
- BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
+ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr = (unsigned long) restart_int_handler;
@@ -323,7 +323,7 @@ static void __init setup_lowcore(void)
lc->io_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
- lc->clock_comparator = -1ULL;
+ lc->clock_comparator = clock_comparator_max;
lc->kernel_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
@@ -469,10 +469,10 @@ static void __init setup_memory_end(void)
vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
- if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
- vmax = 1UL << 42; /* 3-level kernel page table */
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
else
- vmax = 1UL << 53; /* 4-level kernel page table */
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -818,6 +818,9 @@ static int __init setup_hwcaps(void)
case 0x2965:
strcpy(elf_platform, "z13");
break;
+ case 0x3906:
+ strcpy(elf_platform, "z14");
+ break;
}
/*
@@ -922,6 +925,7 @@ void __init setup_arch(char **cmdline_p)
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
+ vmcp_cma_reserve();
check_initrd();
reserve_crashkernel();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1020a11a24e5..1cee6753d47a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1181,6 +1181,7 @@ static int __init s390_smp_init(void)
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
+ rc = rc <= 0 ? rc : 0;
out:
return rc;
}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 39e2f41b6cf0..c8ea715bfe10 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages)
*/
void page_key_read(unsigned long *pfn)
{
+ struct page *page;
unsigned long addr;
-
- addr = (unsigned long) page_address(pfn_to_page(*pfn));
- *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+ unsigned char key;
+
+ page = pfn_to_page(*pfn);
+ addr = (unsigned long) page_address(page);
+ key = (unsigned char) page_get_storage_key(addr) & 0x7f;
+ if (arch_test_page_nodat(page))
+ key |= 0x80;
+ *(unsigned char *) pfn = key;
}
/*
@@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn)
*/
void page_key_write(void *address)
{
- page_set_storage_key((unsigned long) address,
- page_key_rp->data[page_key_rx], 0);
+ struct page *page;
+ unsigned char key;
+
+ key = page_key_rp->data[page_key_rx];
+ page_set_storage_key((unsigned long) address, key & 0x7f, 0);
+ page = virt_to_page(address);
+ if (key & 0x80)
+ arch_set_page_nodat(page, 0);
+ else
+ arch_set_page_dat(page, 0);
if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
return;
page_key_rp = page_key_rp->next;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 192efdfac918..5cbd52169348 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -51,8 +51,15 @@
#include <asm/cio.h>
#include "entry.h"
-u64 sched_clock_base_cc = -1; /* Force to data section. */
-EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+unsigned char tod_clock_base[16] __aligned(8) = {
+ /* Force to data section. */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -75,7 +82,7 @@ void __init time_early_init(void)
struct ptff_qui qui;
/* Initialize TOD steering parameters */
- tod_steering_end = sched_clock_base_cc;
+ tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
vdso_data->ts_end = tod_steering_end;
if (!test_facility(28))
@@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-static void tod_to_timeval(__u64 todval, struct timespec64 *xt)
+static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
- unsigned long long sec;
+ unsigned long long high, low, rem, sec, nsec;
+
+ /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
+ high = (*(unsigned long long *) clk) >> 4;
+ low = (*(unsigned long long *)&clk[7]) << 4;
+ /* Calculate seconds and nano-seconds */
+ sec = high;
+ rem = do_div(sec, 1000000);
+ nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
- sec = todval >> 12;
- do_div(sec, 1000000);
xt->tv_sec = sec;
- todval -= (sec * 1000000) << 12;
- xt->tv_nsec = ((todval * 1000) >> 12);
+ xt->tv_nsec = nsec;
}
void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -148,7 +160,7 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
set_clock_comparator(S390_lowcore.clock_comparator);
cpu = smp_processor_id();
@@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == -1ULL)
+ if (S390_lowcore.clock_comparator == clock_comparator_max)
set_clock_comparator(S390_lowcore.clock_comparator);
}
@@ -197,18 +209,28 @@ static void stp_reset(void);
void read_persistent_clock64(struct timespec64 *ts)
{
- __u64 clock;
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ __u64 delta;
- clock = get_tod_clock() - initial_leap_seconds;
- tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ get_tod_clock_ext(clk);
+ *(__u64 *) &clk[1] -= delta;
+ if (*(__u64 *) &clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, ts);
}
void read_boot_clock64(struct timespec64 *ts)
{
- __u64 clock;
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ __u64 delta;
- clock = sched_clock_base_cc - initial_leap_seconds;
- tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ memcpy(clk, tod_clock_base, 16);
+ *(__u64 *) &clk[1] -= delta;
+ if (*(__u64 *) &clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, ts);
}
static u64 read_tod_clock(struct clocksource *cs)
@@ -335,7 +357,7 @@ static unsigned long clock_sync_flags;
* source. If the clock mode is local it will return -EOPNOTSUPP and
* -EAGAIN if the clock is not in sync with the external reference.
*/
-int get_phys_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long *clock)
{
atomic_t *sw_ptr;
unsigned int sw0, sw1;
@@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta)
struct ptff_qto qto;
/* Fixup the monotonic sched clock. */
- sched_clock_base_cc += delta;
+ *(unsigned long long *) &tod_clock_base[1] += delta;
+ if (*(unsigned long long *) &tod_clock_base[1] < delta)
+ /* Epoch overflow */
+ tod_clock_base[0]++;
/* Adjust TOD steering parameters. */
vdso_data->tb_update_count++;
now = get_tod_clock();
@@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta)
static void clock_sync_local(unsigned long long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != -1ULL) {
+ if (S390_lowcore.clock_comparator != clock_comparator_max) {
S390_lowcore.clock_comparator += delta;
set_clock_comparator(S390_lowcore.clock_comparator);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index b89d19f6f2ab..eacda05b45d7 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
page_frame = get_zeroed_page(GFP_KERNEL);
if (!segment_table || !page_table || !page_frame)
goto out;
+ arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
+ arch_set_page_dat(virt_to_page(page_table), 0);
/* Initialize per-cpu vdso data page */
vd = (struct vdso_per_cpu_data *) page_frame;
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index 8f048c2d6d13..263a7f9eee1e 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -2,6 +2,8 @@
* This is the infamous ld script for the 32 bits vdso
* library
*/
+
+#include <asm/page.h>
#include <asm/vdso.h>
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
@@ -91,7 +93,7 @@ SECTIONS
.debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
PROVIDE(_vdso_data = .);
/DISCARD/ : {
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index f35455d497fe..9e3dbbcc1cfc 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -2,6 +2,8 @@
* This is the infamous ld script for the 64 bits vdso
* library
*/
+
+#include <asm/page.h>
#include <asm/vdso.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -91,7 +93,7 @@ SECTIONS
.debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
PROVIDE(_vdso_data = .);
/DISCARD/ : {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index ce865bd4f81d..e4d36094aceb 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,7 +27,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
unsigned long prefix = kvm_s390_get_prefix(vcpu);
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
- end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+ end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
vcpu->stat.diagnose_10++;
if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
@@ -51,9 +51,9 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
*/
gmap_discard(vcpu->arch.gmap, start, prefix);
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, 4096);
- if (end > prefix + 4096)
- gmap_discard(vcpu->arch.gmap, 4096, 8192);
+ gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ if (end > prefix + PAGE_SIZE)
+ gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
}
return 0;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 653cae5e1ee1..3cc77391a102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -629,7 +629,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * 4096;
+ ptr = asce.origin * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -674,7 +674,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_REGION_SECOND_TRANS;
if (edat1)
dat_protection |= rfte.p;
- ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+ ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION2: {
@@ -692,7 +692,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_REGION_THIRD_TRANS;
if (edat1)
dat_protection |= rste.p;
- ptr = rste.rto * 4096 + vaddr.rtx * 8;
+ ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION3: {
@@ -720,7 +720,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_SEGMENT_TRANSLATION;
if (edat1)
dat_protection |= rtte.fc0.p;
- ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+ ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
/* fallthrough */
case ASCE_TYPE_SEGMENT: {
@@ -743,7 +743,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
goto absolute_address;
}
dat_protection |= ste.fc0.p;
- ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
}
}
if (kvm_is_error_gpa(vcpu->kvm, ptr))
@@ -993,7 +993,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * 4096;
+ ptr = asce.origin * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
@@ -1029,7 +1029,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
union region1_table_entry rfte;
if (*fake) {
- ptr += (unsigned long) vaddr.rfx << 53;
+ ptr += vaddr.rfx * _REGION1_SIZE;
rfte.val = ptr;
goto shadow_r2t;
}
@@ -1044,7 +1044,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
return PGM_REGION_SECOND_TRANS;
if (sg->edat_level >= 1)
*dat_protection |= rfte.p;
- ptr = rfte.rto << 12UL;
+ ptr = rfte.rto * PAGE_SIZE;
shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
@@ -1055,7 +1055,7 @@ shadow_r2t:
union region2_table_entry rste;
if (*fake) {
- ptr += (unsigned long) vaddr.rsx << 42;
+ ptr += vaddr.rsx * _REGION2_SIZE;
rste.val = ptr;
goto shadow_r3t;
}
@@ -1070,7 +1070,7 @@ shadow_r2t:
return PGM_REGION_THIRD_TRANS;
if (sg->edat_level >= 1)
*dat_protection |= rste.p;
- ptr = rste.rto << 12UL;
+ ptr = rste.rto * PAGE_SIZE;
shadow_r3t:
rste.p |= *dat_protection;
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
@@ -1082,7 +1082,7 @@ shadow_r3t:
union region3_table_entry rtte;
if (*fake) {
- ptr += (unsigned long) vaddr.rtx << 31;
+ ptr += vaddr.rtx * _REGION3_SIZE;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1098,7 +1098,7 @@ shadow_r3t:
if (rtte.fc && sg->edat_level >= 2) {
*dat_protection |= rtte.fc0.p;
*fake = 1;
- ptr = rtte.fc1.rfaa << 31UL;
+ ptr = rtte.fc1.rfaa * _REGION3_SIZE;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1106,7 +1106,7 @@ shadow_r3t:
return PGM_SEGMENT_TRANSLATION;
if (sg->edat_level >= 1)
*dat_protection |= rtte.fc0.p;
- ptr = rtte.fc0.sto << 12UL;
+ ptr = rtte.fc0.sto * PAGE_SIZE;
shadow_sgt:
rtte.fc0.p |= *dat_protection;
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
@@ -1118,7 +1118,7 @@ shadow_sgt:
union segment_table_entry ste;
if (*fake) {
- ptr += (unsigned long) vaddr.sx << 20;
+ ptr += vaddr.sx * _SEGMENT_SIZE;
ste.val = ptr;
goto shadow_pgt;
}
@@ -1134,11 +1134,11 @@ shadow_sgt:
*dat_protection |= ste.fc0.p;
if (ste.fc && sg->edat_level >= 1) {
*fake = 1;
- ptr = ste.fc1.sfaa << 20UL;
+ ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
ste.val = ptr;
goto shadow_pgt;
}
- ptr = ste.fc0.pto << 11UL;
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2);
shadow_pgt:
ste.fc0.p |= *dat_protection;
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
@@ -1187,8 +1187,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
vaddr.addr = saddr;
if (fake) {
- /* offset in 1MB guest memory block */
- pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
+ pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
if (!rc)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8a1dac793d6b..785ad028bde6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -329,7 +329,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
start = kvm_s390_logical_to_effective(vcpu, start);
if (m3 & SSKE_MB) {
/* start already designates an absolute address */
- end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
} else {
start = kvm_s390_real_to_abs(vcpu, start);
end = start + PAGE_SIZE;
@@ -893,10 +893,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
case 0x00000000:
/* only 4k frames specify a real address */
start = kvm_s390_real_to_abs(vcpu, start);
- end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
break;
case 0x00001000:
- end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
break;
case 0x00002000:
/* only support 2G frame size if EDAT2 is available and we are
@@ -904,7 +904,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (!test_kvm_facility(vcpu->kvm, 78) ||
psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
break;
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 715c19c45d9a..ba8203e4d516 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1069,7 +1069,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
+ BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
/* 512 byte alignment */
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 92e90e40b6fb..7f17555ad4d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs)
end = get_tod_clock_fast() + (usecs << 12);
do {
clock_saved = 0;
- if (end < S390_lowcore.clock_comparator) {
+ if (tod_after(S390_lowcore.clock_comparator, end)) {
clock_saved = local_tick_disable();
set_clock_comparator(end);
}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ffb15bd4c593..b12663d653d8 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -32,42 +32,63 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+static inline int arch_load_niai4(int *lock)
+{
+ int owner;
+
+ asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0040\n" /* NIAI 4 */
+#endif
+ " l %0,%1\n"
+ : "=d" (owner) : "Q" (*lock) : "memory");
+ return owner;
+}
+
+static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+{
+ int expected = old;
+
+ asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0080\n" /* NIAI 8 */
+#endif
+ " cs %0,%3,%1\n"
+ : "=d" (old), "=Q" (*lock)
+ : "0" (old), "d" (new), "Q" (*lock)
+ : "cc", "memory");
+ return expected == old;
+}
+
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
int cpu = SPINLOCK_LOCKVAL;
- int owner, count, first_diag;
+ int owner, count;
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_load_niai4(&lp->lock);
+ if (owner && arch_vcpu_is_preempted(~owner))
+ smp_yield_cpu(~owner);
- first_diag = 1;
+ count = spin_retry;
while (1) {
- owner = ACCESS_ONCE(lp->lock);
+ owner = arch_load_niai4(&lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
- if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
return;
continue;
}
- /* First iteration: check if the lock owner is running. */
- if (first_diag && arch_vcpu_is_preempted(~owner)) {
- smp_yield_cpu(~owner);
- first_diag = 0;
+ if (count-- >= 0)
continue;
- }
- /* Loop for a while on the lock value. */
count = spin_retry;
- do {
- owner = ACCESS_ONCE(lp->lock);
- } while (owner && count-- > 0);
- if (!owner)
- continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
* yield the CPU unconditionally. For LPAR rely on the
* sense running status.
*/
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
smp_yield_cpu(~owner);
- first_diag = 0;
- }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -75,42 +96,36 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
int cpu = SPINLOCK_LOCKVAL;
- int owner, count, first_diag;
+ int owner, count;
local_irq_restore(flags);
- first_diag = 1;
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_load_niai4(&lp->lock);
+ if (owner && arch_vcpu_is_preempted(~owner))
+ smp_yield_cpu(~owner);
+
+ count = spin_retry;
while (1) {
- owner = ACCESS_ONCE(lp->lock);
+ owner = arch_load_niai4(&lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
local_irq_disable();
- if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
return;
local_irq_restore(flags);
continue;
}
- /* Check if the lock owner is running. */
- if (first_diag && arch_vcpu_is_preempted(~owner)) {
- smp_yield_cpu(~owner);
- first_diag = 0;
+ if (count-- >= 0)
continue;
- }
- /* Loop for a while on the lock value. */
count = spin_retry;
- do {
- owner = ACCESS_ONCE(lp->lock);
- } while (owner && count-- > 0);
- if (!owner)
- continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
* yield the CPU unconditionally. For LPAR rely on the
* sense running status.
*/
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
smp_yield_cpu(~owner);
- first_diag = 0;
- }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait_flags);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index b3bd3f23b8e8..4ea9106417ee 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,8 +15,30 @@
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
static DEFINE_STATIC_KEY_FALSE(have_mvcos);
+static int __init uaccess_init(void)
+{
+ if (test_facility(27))
+ static_branch_enable(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
+
+static inline int copy_with_mvcos(void)
+{
+ if (static_branch_likely(&have_mvcos))
+ return 1;
+ return 0;
+}
+#else
+static inline int copy_with_mvcos(void)
+{
+ return 1;
+}
+#endif
+
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
unsigned long size)
{
@@ -84,7 +106,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_from_user_mvcos(to, from, n);
return copy_from_user_mvcp(to, from, n);
}
@@ -157,7 +179,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_to_user_mvcos(to, from, n);
return copy_to_user_mvcs(to, from, n);
}
@@ -220,7 +242,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_in_user_mvcos(to, from, n);
return copy_in_user_mvc(to, from, n);
}
@@ -292,7 +314,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
unsigned long __clear_user(void __user *to, unsigned long size)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return clear_user_mvcos(to, size);
return clear_user_xc(to, size);
}
@@ -349,11 +371,3 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
return done;
}
EXPORT_SYMBOL(__strncpy_from_user);
-
-static int __init uaccess_init(void)
-{
- if (test_facility(27))
- static_branch_enable(&have_mvcos);
- return 0;
-}
-early_initcall(uaccess_init);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14f25798b001..bdabb013537b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table = table + ((address >> 53) & 0x7ff);
+ table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R1:%016lx ", *table);
@@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION2:
- table = table + ((address >> 42) & 0x7ff);
+ table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R2:%016lx ", *table);
@@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION3:
- table = table + ((address >> 31) & 0x7ff);
+ table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R3:%016lx ", *table);
@@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_SEGMENT:
- table = table + ((address >> 20) & 0x7ff);
+ table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("S:%016lx ", *table);
@@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
goto out;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
}
- table = table + ((address >> 12) & 0xff);
+ table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("P:%016lx ", *table);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fb3d3cdb370..9e1494e3d849 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit)
unsigned long *table;
unsigned long etype, atype;
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
+ if (limit < _REGION3_SIZE) {
+ limit = _REGION3_SIZE - 1;
atype = _ASCE_TYPE_SEGMENT;
etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
+ } else if (limit < _REGION2_SIZE) {
+ limit = _REGION2_SIZE - 1;
atype = _ASCE_TYPE_REGION3;
etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
+ } else if (limit < _REGION1_SIZE) {
+ limit = _REGION1_SIZE - 1;
atype = _ASCE_TYPE_REGION2;
etype = _REGION2_ENTRY_EMPTY;
} else {
@@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
spin_lock_init(&gmap->guest_table_lock);
spin_lock_init(&gmap->shadow_lock);
atomic_set(&gmap->ref_count, 1);
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
goto out_free;
page->index = 0;
@@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap)
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
@@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
}
spin_unlock(&gmap->guest_table_lock);
if (page)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return 0;
}
@@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
/* Create higher level tables in the gmap page table */
table = gmap->table;
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
+ gaddr & _REGION1_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
+ gaddr & _REGION2_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
+ gaddr & _REGION3_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
/* Walk the parent mm page table */
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
@@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if (level == 4)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION2:
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION3:
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_SEGMENT:
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
- table += (gaddr >> 12) & 0xff;
+ table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
}
return table;
}
@@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
if (!table || *table & _PAGE_INVALID)
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1);
+ gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
}
@@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
int i;
BUG_ON(!gmap_is_shadow(sg));
- for (i = 0; i < 256; i++, raddr += 1UL << 12)
+ for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
pgt[i] = _PAGE_INVALID;
}
@@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
- sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+ sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
*ste = _SEGMENT_ENTRY_EMPTY;
@@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
- for (i = 0; i < 2048; i++, raddr += 1UL << 20) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
continue;
pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
@@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1);
- r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+ r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
*r3e = _REGION3_ENTRY_EMPTY;
@@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
- for (i = 0; i < 2048; i++, raddr += 1UL << 31) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
continue;
sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
@@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1);
- r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+ r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
*r2e = _REGION2_ENTRY_EMPTY;
@@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
- for (i = 0; i < 2048; i++, raddr += 1UL << 42) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
continue;
r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
@@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1);
- r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+ r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
*r1e = _REGION1_ENTRY_EMPTY;
@@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
- for (i = 0; i < 2048; i++, raddr += 1UL << 53) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
continue;
r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
@@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
/* protect after insertion, so it will get properly invalidated */
down_read(&parent->mm->mmap_sem);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096,
+ ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
PROT_READ, PGSTE_VSIE_BIT);
up_read(&parent->mm->mmap_sem);
spin_lock(&parent->shadow_lock);
@@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r2t read-only in parent gmap page table */
- raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1;
+ raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
origin = r2t & _REGION_ENTRY_ORIGIN;
- offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
@@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r3t read-only in parent gmap page table */
- raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2;
+ raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
origin = r3t & _REGION_ENTRY_ORIGIN;
- offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
@@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
/* Allocate a shadow segment table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make sgt read-only in parent gmap page table */
- raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3;
+ raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
origin = sgt & _REGION_ENTRY_ORIGIN;
- offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
@@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make pgt read-only in parent gmap page table (not the pgste) */
- raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT;
+ raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
spin_lock(&sg->guest_table_lock);
@@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
}
/* Check for top level table */
start = sg->orig_asce & _ASCE_ORIGIN;
- end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+ end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
gaddr < end) {
/* The complete shadow table has to go */
@@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
return;
}
/* Remove the page table tree from on specific entry */
- head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12);
+ head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
gmap_for_each_rmap_safe(rmap, rnext, head) {
bits = rmap->raddr & _SHADOW_RMAP_MASK;
raddr = rmap->raddr ^ bits;
@@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
+ offset = offset * (PAGE_SIZE / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
@@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2137,6 +2168,7 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
+ zap_zero_pages(mm);
up_write(&mm->mmap_sem);
return 0;
}
@@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte)))
- ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8111694ce55a..3b567838b905 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/poison.h>
#include <linux/initrd.h>
#include <linux/export.h>
+#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <asm/processor.h>
@@ -84,7 +85,7 @@ void __init paging_init(void)
psw_t psw;
init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > (1UL << 42)) {
+ if (VMALLOC_END > _REGION2_SIZE) {
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION2_ENTRY_EMPTY;
} else {
@@ -93,8 +94,7 @@ void __init paging_init(void)
}
init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
S390_lowcore.kernel_asce = init_mm.context.asce;
- clear_table((unsigned long *) init_mm.pgd, pgd_type,
- sizeof(unsigned long)*2048);
+ crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
/* enable virtual mapping in kernel mode */
@@ -137,6 +137,8 @@ void __init mem_init(void)
free_all_bootmem();
setup_zero_pages(); /* Setup zeroed pages. */
+ cmma_init_nodat();
+
mem_init_print_info(NULL);
}
@@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+ struct s390_cma_mem_data *mem_data;
+ unsigned long start, end;
+
+ mem_data = data;
+ start = cma_get_base(cma);
+ end = start + cma_get_size(cma);
+ if (end < mem_data->start)
+ return 0;
+ if (start >= mem_data->end)
+ return 0;
+ return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct s390_cma_mem_data mem_data;
+ struct memory_notify *arg;
+ int rc = 0;
+
+ arg = data;
+ mem_data.start = arg->start_pfn << PAGE_SHIFT;
+ mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+ if (action == MEM_GOING_OFFLINE)
+ rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+ return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+ .notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+ return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 69a7b01ae746..07fa7b8ae233 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -10,9 +10,10 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
-
+#include <asm/facility.h>
#include <asm/page-states.h>
static int cmma_flag = 1;
@@ -36,14 +37,16 @@ __setup("cmma=", cmma);
static inline int cmma_test_essa(void)
{
register unsigned long tmp asm("0") = 0;
- register int rc asm("1") = -EOPNOTSUPP;
+ register int rc asm("1");
+ /* test ESSA_GET_STATE */
asm volatile(
- " .insn rrf,0xb9ab0000,%1,%1,0,0\n"
+ " .insn rrf,0xb9ab0000,%1,%1,%2,0\n"
"0: la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+&d" (rc), "+&d" (tmp));
+ : "=&d" (rc), "+&d" (tmp)
+ : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
return rc;
}
@@ -51,11 +54,26 @@ void __init cmma_init(void)
{
if (!cmma_flag)
return;
- if (cmma_test_essa())
+ if (cmma_test_essa()) {
cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
}
-static inline void set_page_unstable(struct page *page, int order)
+static inline unsigned char get_page_state(struct page *page)
+{
+ unsigned char state;
+
+ asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (state)
+ : "a" (page_to_phys(page)),
+ "i" (ESSA_GET_STATE));
+ return state & 0x3f;
+}
+
+static inline void set_page_unused(struct page *page, int order)
{
int i, rc;
@@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order)
"i" (ESSA_SET_UNUSED));
}
-void arch_free_page(struct page *page, int order)
+static inline void set_page_stable_dat(struct page *page, int order)
{
- if (!cmma_flag)
- return;
- set_page_unstable(page, order);
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_STABLE));
}
-static inline void set_page_stable(struct page *page, int order)
+static inline void set_page_stable_nodat(struct page *page, int order)
{
int i, rc;
@@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order)
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
: "=&d" (rc)
: "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
+ "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd) || pmd_large(*pmd))
+ continue;
+ page = virt_to_page(pmd_val(*pmd));
+ set_bit(PG_arch_1, &page->flags);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pud_t *pud;
+ int i;
+
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud) || pud_large(*pud))
+ continue;
+ if (!pud_folded(*pud)) {
+ page = virt_to_page(pud_val(*pud));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pmd(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ p4d_t *p4d;
+ int i;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d))
+ continue;
+ if (!p4d_folded(*p4d)) {
+ page = virt_to_page(p4d_val(*p4d));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pud(p4d, addr, next);
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+ unsigned long addr, next;
+ struct page *page;
+ pgd_t *pgd;
+ int i;
+
+ addr = 0;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, MODULES_END);
+ if (pgd_none(*pgd))
+ continue;
+ if (!pgd_folded(*pgd)) {
+ page = virt_to_page(pgd_val(*pgd));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_p4d(pgd, addr, next);
+ } while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+ struct memblock_region *reg;
+ struct page *page;
+ unsigned long start, end, ix;
+
+ if (cmma_flag < 2)
+ return;
+ /* Mark pages used in kernel page tables */
+ mark_kernel_pgd();
+
+ /* Set all kernel pages not used for page tables to stable/no-dat */
+ for_each_memblock(memory, reg) {
+ start = memblock_region_memory_base_pfn(reg);
+ end = memblock_region_memory_end_pfn(reg);
+ page = pfn_to_page(start);
+ for (ix = start; ix < end; ix++, page++) {
+ if (__test_and_clear_bit(PG_arch_1, &page->flags))
+ continue; /* skip page table pages */
+ if (!list_empty(&page->lru))
+ continue; /* skip free pages */
+ set_page_stable_nodat(page, 0);
+ }
+ }
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_unused(page, order);
}
void arch_alloc_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_stable(page, order);
+ if (cmma_flag < 2)
+ set_page_stable_dat(page, order);
+ else
+ set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_stable_dat(page, order);
+}
+
+void arch_set_page_nodat(struct page *page, int order)
+{
+ if (cmma_flag < 2)
+ return;
+ set_page_stable_nodat(page, order);
+}
+
+int arch_test_page_nodat(struct page *page)
+{
+ unsigned char state;
+
+ if (cmma_flag < 2)
+ return 0;
+ state = get_page_state(page);
+ return !!(state & 0x20);
}
void arch_set_page_states(int make_stable)
@@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable)
list_for_each(l, &zone->free_area[order].free_list[t]) {
page = list_entry(l, struct page, lru);
if (make_stable)
- set_page_stable(page, order);
+ set_page_stable_dat(page, 0);
else
- set_page_unstable(page, order);
+ set_page_unused(page, order);
}
}
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 180481589246..552f898dfa74 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,6 +7,7 @@
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
pud_t new;
int i, ro, nx;
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
return -ENOMEM;
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
@@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
return;
}
for (i = 0; i < nr; i++) {
- __ptep_ipte(address, pte, IPTE_GLOBAL);
+ __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 18918e394ce4..c5b74dd61197 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
if (!page)
return NULL;
+ arch_set_page_dat(page, 2);
return (unsigned long *) page_to_phys(page);
}
@@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
int rc, notify;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- BUG_ON(mm->context.asce_limit < (1UL << 42));
+ BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
if (end >= TASK_SIZE_MAX)
return -ENOMEM;
rc = 0;
@@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
}
spin_lock_bh(&mm->page_table_lock);
pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit == (1UL << 42)) {
+ if (mm->context.asce_limit == _REGION2_SIZE) {
crst_table_init(table, _REGION2_ENTRY_EMPTY);
p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
} else {
@@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd_t *pgd;
/* downgrade should only happen from 3 to 2 levels (compat only) */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
+ BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
if (current->active_mm == mm) {
clear_user_asce();
@@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd = mm->pgd;
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
crst_table_free(mm, (unsigned long *) pgd);
@@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
__free_page(page);
return NULL;
}
+ arch_set_page_dat(page, 0);
/* Initialize page table */
table = (unsigned long *) page_to_phys(page);
if (mm_alloc_pgste(mm)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4a1f7366b17a..4198a71b8fdd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,8 +25,49 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+ }
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ }
+}
+
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte(addr, ptep, IPTE_LOCAL);
+ ptep_ipte_local(mm, addr, ptep, nodat);
else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
} else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_direct(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_direct(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(old, pgste, mm);
pgste_set(ptep, pgste);
@@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_modify_prot_commit);
+static inline void pmdp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+}
+
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
@@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte(addr, pmdp, IDTE_LOCAL);
+ pmdp_idte_local(mm, addr, pmdp);
else
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
+ pmdp_idte_global(mm, addr, pmdp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
- else
- __pmdp_csp(pmdp);
+ } else {
+ pmdp_idte_global(mm, addr, pmdp);
+ }
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(pmdp_xchg_lazy);
-static inline pud_t pudp_flush_direct(struct mm_struct *mm,
- unsigned long addr, pud_t *pudp)
+static inline void pudp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
{
- pud_t old;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
- old = *pudp;
- if (pud_val(old) & _REGION_ENTRY_INVALID)
- return old;
- if (!MACHINE_HAS_IDTE) {
+static inline void pudp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+ else
/*
* Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here
*/
__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old;
+
+ old = *pudp;
+ if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pudp_idte(addr, pudp, IDTE_LOCAL);
+ pudp_idte_local(mm, addr, pudp);
else
- __pudp_idte(addr, pudp, IDTE_GLOBAL);
+ pudp_idte_global(mm, addr, pudp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
{
pte_t entry;
pgste_t pgste;
- int pte_i, pte_p;
+ int pte_i, pte_p, nodat;
pgste = pgste_get_lock(ptep);
entry = *ptep;
@@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
return -EAGAIN;
}
/* Change access rights and set pgste bit */
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
if (prot == PROT_NONE && !pte_i) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pgste = pgste_update_all(entry, pgste, mm);
pte_val(entry) |= _PAGE_INVALID;
}
if (prot == PROT_READ && !pte_p) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pte_val(entry) &= ~_PAGE_INVALID;
pte_val(entry) |= _PAGE_PROTECT;
}
@@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
{
pgste_t pgste;
+ int nodat;
pgste = pgste_get_lock(ptep);
/* notifier is called by the caller */
- ptep_flush_direct(mm, saddr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_flush_direct(mm, saddr, ptep, nodat);
/* don't touch the storage key - it belongs to parent pgste */
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
pgste_set_unlock(ptep, pgste);
@@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte_t *ptep;
pte_t pte;
bool dirty;
+ int nodat;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
@@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_ipte_global(mm, addr, ptep, nodat);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d8398962a723..c0af0d7b6e5f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
-static inline p4d_t *vmem_p4d_alloc(void)
+void *vmem_crst_alloc(unsigned long val)
{
- p4d_t *p4d = NULL;
+ unsigned long *table;
- p4d = vmem_alloc_pages(2);
- if (!p4d)
- return NULL;
- clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
- return p4d;
-}
-
-static inline pud_t *vmem_pud_alloc(void)
-{
- pud_t *pud = NULL;
-
- pud = vmem_alloc_pages(2);
- if (!pud)
- return NULL;
- clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pud;
-}
-
-pmd_t *vmem_pmd_alloc(void)
-{
- pmd_t *pmd = NULL;
-
- pmd = vmem_alloc_pages(2);
- if (!pmd)
- return NULL;
- clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pmd;
+ table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
}
pte_t __ref *vmem_pte_alloc(void)
@@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
}
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
continue;
}
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);
@@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
@@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd534b4d40e3..0ae3936e266f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,6 +24,14 @@
bool zpci_unique_uid;
+static void update_uid_checking(bool new)
+{
+ if (zpci_unique_uid != new)
+ zpci_dbg(1, "uid checking:%d\n", new);
+
+ zpci_unique_uid = new;
+}
+
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
struct {
@@ -319,7 +327,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
goto out;
}
- zpci_unique_uid = rrb->response.uid_checking;
+ update_uid_checking(rrb->response.uid_checking);
WARN_ON_ONCE(rrb->response.entry_size !=
sizeof(struct clp_fh_list_entry));
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 025ea20fc4b4..29d72bf8ed2b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -41,7 +41,7 @@ static struct facility_def facility_defs[] = {
27, /* mvcos */
32, /* compare and swap and store */
33, /* compare and swap and store 2 */
- 34, /* general extension facility */
+ 34, /* general instructions extension */
35, /* execute extensions */
#endif
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = {
#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
53, /* load-and-zero-rightmost-byte, etc. */
#endif
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+ 58, /* miscellaneous-instruction-extension 2 */
+#endif
-1 /* END */
}
},