diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2020-11-16 08:06:40 +0100 |
---|---|---|
committer | Heiko Carstens <hca@linux.ibm.com> | 2020-11-23 12:01:12 +0100 |
commit | 87d5986345219a7e4f204726d9085ea87f3e22d0 (patch) | |
tree | 7c3cb89736d699e56186dce5f3863c311afb6e57 /arch/s390/include | |
parent | Merge branch 'fixes' into features (diff) | |
download | linux-87d5986345219a7e4f204726d9085ea87f3e22d0.tar.xz linux-87d5986345219a7e4f204726d9085ea87f3e22d0.zip |
s390/mm: remove set_fs / rework address space handling
Remove set_fs support from s390. With doing this rework address space
handling and simplify it. As a result address spaces are now setup
like this:
CPU running in | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE
----------------------------|-----------|-----------|-----------
user space | user | user | kernel
kernel, normal execution | kernel | user | kernel
kernel, kvm guest execution | gmap | user | kernel
To achieve this the getcpu vdso syscall is removed in order to avoid
secondary address mode and a separate vdso address space in for user
space. The getcpu vdso syscall will be implemented differently with a
subsequent patch.
The kernel accesses user space always via secondary address space.
This happens in different ways:
- with mvcos in home space mode and directly read/write to secondary
address space
- with mvcs/mvcp in primary space mode and copy from primary space to
secondary space or vice versa
- with e.g. cs in secondary space mode and access secondary space
Switching translation modes happens with sacf before and after
instructions which access user space, like before.
Lazy handling of control register reloading is removed in the hope to
make everything simpler, but at the cost of making kernel entry and
exit a bit slower. That is: on kernel entry the primary asce is always
changed to contain the kernel asce, and on kernel exit the primary
asce is changed again so it contains the user asce.
In kernel mode there is only one exception to the primary asce: when
kvm guests are executed the primary asce contains the gmap asce (which
describes the guest address space). The primary asce is reset to
kernel asce whenever kvm guest execution is interrupted, so that this
doesn't has to be taken into account for any user space accesses.
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include')
-rw-r--r-- | arch/s390/include/asm/futex.h | 6 | ||||
-rw-r--r-- | arch/s390/include/asm/lowcore.h | 4 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 25 | ||||
-rw-r--r-- | arch/s390/include/asm/processor.h | 7 | ||||
-rw-r--r-- | arch/s390/include/asm/ptrace.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/uaccess.h | 22 | ||||
-rw-r--r-- | arch/s390/include/asm/vdso.h | 25 |
7 files changed, 9 insertions, 81 deletions
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 26f9144562c9..c22debfcebf1 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); switch (op) { case FUTEX_OP_SET: __futex_atomic_op("lr %2,%5\n", @@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } - disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - mm_segment_t old_fs; int ret; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); - disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 612ed3c6d581..69ce9191eaf1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -116,7 +116,7 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0380 */ __u64 user_asce; /* 0x0388 */ - __u64 vdso_asce; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ /* * The lpp and current_pid fields form a @@ -134,7 +134,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u32 fpu_flags; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u64 vdso_per_cpu_data; /* 0x03c0 */ + __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index cec19ae164eb..51def960a3dd 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -71,16 +71,6 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -static inline void set_user_asce(struct mm_struct *mm) -{ - S390_lowcore.user_asce = mm->context.asce; - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); -} - -mm_segment_t enable_sacf_uaccess(void); -void disable_sacf_uaccess(mm_segment_t old_fs); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -88,15 +78,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR1 and CR7 */ - if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (test_cpu_flag(CIF_ASCE_SECONDARY)) { - __ctl_load(S390_lowcore.vdso_asce, 7, 7); - clear_cpu_flag(CIF_ASCE_SECONDARY); - } + /* Clear previous user-ASCE from CR7 */ + __ctl_load(S390_lowcore.kernel_asce, 7, 7); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -115,7 +98,7 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - set_fs(current->thread.mm_segment); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #define enter_lazy_tlb(mm,tsk) do { } while (0) @@ -126,7 +109,7 @@ static inline void activate_mm(struct mm_struct *prev, { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - set_user_asce(next); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2043c562ec55..6b7269f51f83 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,8 +14,6 @@ #include <linux/bits.h> -#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ #define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ @@ -23,8 +21,6 @@ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) -#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_FPU BIT(CIF_FPU) #define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ) @@ -102,8 +98,6 @@ extern void __bpon(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef unsigned int mm_segment_t; - /* * Thread structure */ @@ -116,7 +110,6 @@ struct thread_struct { unsigned long hardirq_timer; /* task cputime in hardirq context */ unsigned long softirq_timer; /* task cputime in softirq context */ unsigned long sys_call_table; /* system call table address */ - mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 16b3e4396312..73ca7f7cac33 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -87,6 +87,7 @@ struct pt_regs unsigned int int_parm; unsigned long int_parm_long; unsigned long flags; + unsigned long cr1; }; /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c868e7ee49b3..e59fd96a1561 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,24 +18,6 @@ #include <asm/extable.h> #include <asm/facility.h> -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define KERNEL_DS (0) -#define KERNEL_DS_SACF (1) -#define USER_DS (2) -#define USER_DS_SACF (3) - -#define get_fs() (current->thread.mm_segment) -#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) - -void set_fs(mm_segment_t fs); - static inline int __range_ok(unsigned long addr, unsigned long size) { return 1; @@ -88,7 +70,7 @@ int __get_user_bad(void) __attribute__((noreturn)); static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x010000UL; + unsigned long spec = 0x810000UL; int rc; switch (size) { @@ -121,7 +103,7 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x01UL; + unsigned long spec = 0x81UL; int rc; switch (size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 29b44a930e71..9b299c05abf1 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,32 +12,7 @@ #ifndef __ASSEMBLY__ -/* - * Note about the vdso_data and vdso_per_cpu_data structures: - * - * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the - * structure is supposed to be known only to the function in the vdso - * itself and may change without notice. - */ - -struct vdso_per_cpu_data { - /* - * Note: node_id and cpu_nr must be at adjacent memory locations. - * VDSO userspace must read both values with a single instruction. - */ - union { - __u64 getcpu_val; - struct { - __u32 node_id; - __u32 cpu_nr; - }; - }; -}; - extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct lowcore *lowcore); -void vdso_free_per_cpu(struct lowcore *lowcore); - #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ |