summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig20
-rw-r--r--arch/s390/boot/compressed/Makefile9
-rw-r--r--arch/s390/boot/compressed/misc.c4
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c2
-rw-r--r--arch/s390/include/asm/airq.h67
-rw-r--r--arch/s390/include/asm/bitops.h14
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/hardirq.h5
-rw-r--r--arch/s390/include/asm/hugetlb.h135
-rw-r--r--arch/s390/include/asm/hw_irq.h17
-rw-r--r--arch/s390/include/asm/irq.h35
-rw-r--r--arch/s390/include/asm/mmu_context.h3
-rw-r--r--arch/s390/include/asm/page.h19
-rw-r--r--arch/s390/include/asm/pci.h54
-rw-r--r--arch/s390/include/asm/pci_insn.h12
-rw-r--r--arch/s390/include/asm/pci_io.h10
-rw-r--r--arch/s390/include/asm/pgtable.h637
-rw-r--r--arch/s390/include/asm/processor.h10
-rw-r--r--arch/s390/include/asm/serial.h6
-rw-r--r--arch/s390/include/asm/switch_to.h13
-rw-r--r--arch/s390/include/asm/tlb.h11
-rw-r--r--arch/s390/include/asm/tlbflush.h6
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h1
-rw-r--r--arch/s390/kernel/cache.c15
-rw-r--r--arch/s390/kernel/crash_dump.c51
-rw-r--r--arch/s390/kernel/entry.S16
-rw-r--r--arch/s390/kernel/entry64.S11
-rw-r--r--arch/s390/kernel/irq.c160
-rw-r--r--arch/s390/kernel/kprobes.c21
-rw-r--r--arch/s390/kernel/nmi.c5
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c4
-rw-r--r--arch/s390/kernel/perf_event.c9
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c52
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/smp.c17
-rw-r--r--arch/s390/kernel/suspend.c11
-rw-r--r--arch/s390/kernel/swsusp_asm64.S7
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/vdso.c6
-rw-r--r--arch/s390/kernel/vtime.c7
-rw-r--r--arch/s390/kvm/gaccess.h12
-rw-r--r--arch/s390/kvm/kvm-s390.c21
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/uaccess_pt.c16
-rw-r--r--arch/s390/mm/dump_pagetables.c18
-rw-r--r--arch/s390/mm/fault.c4
-rw-r--r--arch/s390/mm/gup.c6
-rw-r--r--arch/s390/mm/hugetlbpage.c124
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgtable.c87
-rw-r--r--arch/s390/mm/vmem.c15
-rw-r--r--arch/s390/net/bpf_jit_comp.c113
-rw-r--r--arch/s390/oprofile/init.c2
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c575
-rw-r--r--arch/s390/pci/pci_clp.c146
-rw-r--r--arch/s390/pci/pci_dma.c16
-rw-r--r--arch/s390/pci/pci_event.c2
-rw-r--r--arch/s390/pci/pci_insn.c18
-rw-r--r--arch/s390/pci/pci_msi.c142
-rw-r--r--arch/s390/pci/pci_sysfs.c27
68 files changed, 1476 insertions, 1379 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 22f75b504f7f..8b7892bf6d8b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -116,8 +116,10 @@ config S390
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_GENERIC_HARDIRQS
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
@@ -227,11 +229,12 @@ config MARCH_Z196
not work on older machines.
config MARCH_ZEC12
- bool "IBM zEC12"
+ bool "IBM zBC12 and zEC12"
select HAVE_MARCH_ZEC12_FEATURES if 64BIT
help
- Select this to enable optimizations for IBM zEC12 (2827 series). The
- kernel will be slightly faster but will not work on older machines.
+ Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+ 2827 series). The kernel will be slightly faster but will not work on
+ older machines.
endchoice
@@ -443,6 +446,16 @@ config PCI_NR_FUNCTIONS
This allows you to specify the maximum number of PCI functions which
this kernel will support.
+config PCI_NR_MSI
+ int "Maximum number of MSI interrupts (64-32768)"
+ range 64 32768
+ default "256"
+ help
+ This defines the number of virtual interrupts the kernel will
+ provide for MSI interrupts. If you configure your system to have
+ too few drivers will fail to allocate MSI interrupts for all
+ PCI devices.
+
source "drivers/pci/Kconfig"
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/hotplug/Kconfig"
@@ -709,6 +722,7 @@ config S390_GUEST
def_bool y
prompt "s390 support for virtio devices"
depends on 64BIT
+ select TTY
select VIRTUALIZATION
select VIRTIO
select VIRTIO_CONSOLE
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 3ad8f61c9985..866ecbe670e4 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -6,9 +6,9 @@
BITS := $(if $(CONFIG_64BIT),64,31)
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
- vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \
- sizes.h head$(BITS).o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head$(BITS).o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -48,6 +48,7 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_XZ) := xz
@@ -56,6 +57,8 @@ $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+ $(call if_changed,lz4)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
$(call if_changed,lzma)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index c4c6a1cf221b..57cbaff1f397 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -47,6 +47,10 @@ static unsigned long free_mem_end_ptr;
#include "../../../../lib/decompress_bunzip2.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
#ifdef CONFIG_KERNEL_LZMA
#include "../../../../lib/decompress_unlzma.c"
#endif
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index bb5dd496614f..17ab8b7b53cc 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
int hypfs_dbfs_init(void)
{
dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
- return PTR_RET(dbfs_dir);
+ return PTR_ERR_OR_ZERO(dbfs_dir);
}
void hypfs_dbfs_exit(void)
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 4066cee0c2d2..4bbb5957ed1b 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,6 +9,8 @@
#ifndef _ASM_S390_AIRQ_H
#define _ASM_S390_AIRQ_H
+#include <linux/bit_spinlock.h>
+
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
@@ -23,4 +25,69 @@ struct airq_struct {
int register_adapter_interrupt(struct airq_struct *airq);
void unregister_adapter_interrupt(struct airq_struct *airq);
+/* Adapter interrupt bit vector */
+struct airq_iv {
+ unsigned long *vector; /* Adapter interrupt bit vector */
+ unsigned long *avail; /* Allocation bit mask for the bit vector */
+ unsigned long *bitlock; /* Lock bit mask for the bit vector */
+ unsigned long *ptr; /* Pointer associated with each bit */
+ unsigned int *data; /* 32 bit value associated with each bit */
+ unsigned long bits; /* Number of bits in the vector */
+ unsigned long end; /* Number of highest allocated bit + 1 */
+ spinlock_t lock; /* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc_bit(struct airq_iv *iv);
+void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+ unsigned long end);
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+ return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+ unsigned int data)
+{
+ iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+ unsigned long ptr)
+{
+ iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->ptr[bit];
+}
+
#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 4d8604e311f3..10135a38673c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -693,7 +693,7 @@ static inline int find_next_bit_left(const unsigned long *addr,
size -= offset;
p = addr + offset / BITS_PER_LONG;
if (bit) {
- set = __flo_word(0, *p & (~0UL << bit));
+ set = __flo_word(0, *p & (~0UL >> bit));
if (set >= size)
return size + offset;
if (set < BITS_PER_LONG)
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index ffb898961c8d..d42625053c37 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
return 0;
}
+void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
#include <asm/div64.h>
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 0c82ba86e997..a908d2941c5d 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -20,4 +20,9 @@
#define HARDIRQ_BITS 8
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index bd90359d6d22..11eae5f55b70 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -17,6 +17,9 @@
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
/*
* If the arch doesn't supply something else, assume that hugepage
@@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
int arch_prepare_hugepage(struct page *page);
void arch_release_hugepage(struct page *page);
-static inline pte_t huge_pte_wrprotect(pte_t pte)
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- pte_val(pte) |= _PAGE_RO;
- return pte;
+ pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
}
-static inline int huge_pte_none(pte_t pte)
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
- return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
- !(pte_val(pte) & _SEGMENT_ENTRY_RO);
+ huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
{
- pte_t pte = *ptep;
- unsigned long mask;
-
- if (!MACHINE_HAS_HPAGE) {
- ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
- if (ptep) {
- mask = pte_val(pte) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte = pte_mkhuge(*ptep);
- pte_val(pte) |= mask;
- }
+ int changed = !pte_same(huge_ptep_get(ptep), pte);
+ if (changed) {
+ huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
- return pte;
+ return changed;
}
-static inline void __pmd_csp(pmd_t *pmdp)
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- register unsigned long reg2 asm("2") = pmd_val(*pmdp);
- register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
- _SEGMENT_ENTRY_INV;
- register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
- asm volatile(
- " csp %1,%3"
- : "=m" (*pmdp)
- : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+ pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
-static inline void huge_ptep_invalidate(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- pmd_t *pmdp = (pmd_t *) ptep;
-
- if (MACHINE_HAS_IDTE)
- __pmd_idte(address, pmdp);
- else
- __pmd_csp(pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = huge_ptep_get(ptep);
-
- huge_ptep_invalidate(mm, addr, ptep);
- return pte;
-}
-
-#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
-({ \
- int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
- if (__changed) { \
- huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
- set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
- } \
- __changed; \
-})
-
-#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
-({ \
- pte_t __pte = huge_ptep_get(__ptep); \
- if (huge_pte_write(__pte)) { \
- huge_ptep_invalidate(__mm, __addr, __ptep); \
- set_huge_pte_at(__mm, __addr, __ptep, \
- huge_pte_wrprotect(__pte)); \
- } \
-})
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
{
- huge_ptep_invalidate(vma->vm_mm, address, ptep);
+ return mk_pte(page, pgprot);
}
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+static inline int huge_pte_none(pte_t pte)
{
- pte_t pte;
- pmd_t pmd;
-
- pmd = mk_pmd_phys(page_to_phys(page), pgprot);
- pte_val(pte) = pmd_val(pmd);
- return pte;
+ return pte_none(pte);
}
static inline int huge_pte_write(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- return pmd_write(pmd);
+ return pte_write(pte);
}
static inline int huge_pte_dirty(pte_t pte)
{
- /* No dirty bit in the segment table entry. */
- return 0;
+ return pte_dirty(pte);
}
static inline pte_t huge_pte_mkwrite(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
- return pte;
+ return pte_mkwrite(pte);
}
static inline pte_t huge_pte_mkdirty(pte_t pte)
{
- /* No dirty bit in the segment table entry. */
- return pte;
+ return pte_mkdirty(pte);
}
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t huge_pte_wrprotect(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
- return pte;
+ return pte_wrprotect(pte);
}
-static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
{
- pmd_clear((pmd_t *) ptep);
+ return pte_modify(pte, newprot);
}
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index 7e3d2586c1ff..ee96a8b697f9 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -4,19 +4,8 @@
#include <linux/msi.h>
#include <linux/pci.h>
-static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
-{
- return __irq_get_msi_desc(irq);
-}
-
-/* Must be called with msi map lock held */
-static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
-{
- if (!msi)
- return -EINVAL;
-
- msi->irq = irq;
- return 0;
-}
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
#endif
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 87c17bfb2968..1eaa3625803c 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,17 +1,28 @@
#ifndef _ASM_IRQ_H
#define _ASM_IRQ_H
+#define EXT_INTERRUPT 1
+#define IO_INTERRUPT 2
+#define THIN_INTERRUPT 3
+
+#define NR_IRQS_BASE 4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ 0
+
+#ifndef __ASSEMBLY__
+
#include <linux/hardirq.h>
#include <linux/percpu.h>
#include <linux/cache.h>
#include <linux/types.h>
-enum interruption_main_class {
- EXTERNAL_INTERRUPT,
- IO_INTERRUPT,
- NR_IRQS
-};
-
enum interruption_class {
IRQEXT_CLK,
IRQEXT_EXC,
@@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void);
void measurement_alert_subclass_register(void);
void measurement_alert_subclass_unregister(void);
-#ifdef CONFIG_LOCKDEP
-# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq)
-# define disable_irq_nosync_lockdep_irqsave(irq, flags) \
- disable_irq_nosync(irq)
-# define disable_irq_lockdep(irq) disable_irq(irq)
-# define enable_irq_lockdep(irq) enable_irq(irq)
-# define enable_irq_lockdep_irqrestore(irq, flags) \
- enable_irq(irq)
-#endif
+#define irq_canonicalize(irq) (irq)
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 4fb67a0e4ddf..9f973d8de90e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -60,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
WARN_ON(atomic_read(&prev->context.attach_count) < 0);
atomic_inc(&next->context.attach_count);
/* Check for TLBs not flushed yet */
- if (next->context.flush_mm)
- __tlb_flush_mm(next);
+ __tlb_flush_mm_lazy(next);
}
#define enter_lazy_tlb(mm,tsk) do { } while (0)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 5d64fb7619cc..1e51f2915b2e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -32,16 +32,6 @@
void storage_key_init_range(unsigned long start, unsigned long end);
-static inline unsigned long pfmf(unsigned long function, unsigned long address)
-{
- asm volatile(
- " .insn rre,0xb9af0000,%[function],%[address]"
- : [address] "+a" (address)
- : [function] "d" (function)
- : "memory");
- return address;
-}
-
static inline void clear_page(void *page)
{
register unsigned long reg1 asm ("1") = 0;
@@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(unsigned long pfn)
-{
- return page_reset_referenced(pfn << PAGE_SHIFT);
-}
-
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6e577ba0e5da..c290f13d1c47 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,6 +6,7 @@
/* must be set before including pci_clp.h */
#define PCI_BAR_COUNT 6
+#include <linux/pci.h>
#include <asm-generic/pci.h>
#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
@@ -53,14 +54,9 @@ struct zpci_fmb {
atomic64_t unmapped_pages;
} __packed __aligned(16);
-struct msi_map {
- unsigned long irq;
- struct msi_desc *msi;
- struct hlist_node msi_chain;
-};
-
-#define ZPCI_NR_MSI_VECS 64
-#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1)
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -91,8 +87,7 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
- struct zdev_irq_map *irq_map;
- struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+ struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
/* DMA stuff */
@@ -122,11 +117,6 @@ struct zpci_dev {
struct dentry *debugfs_perf;
};
-struct pci_hp_callback_ops {
- int (*create_slot) (struct zpci_dev *zdev);
- void (*remove_slot) (struct zpci_dev *zdev);
-};
-
static inline bool zdev_enabled(struct zpci_dev *zdev)
{
return (zdev->fh & (1UL << 31)) ? true : false;
@@ -146,32 +136,38 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
/* CLP */
-int clp_find_pci_devices(void);
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
-/* MSI */
-struct msi_desc *__irq_get_msi_desc(unsigned int);
-int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
-int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
-void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
-int zpci_msihash_init(void);
-void zpci_msihash_exit(void);
-
#ifdef CONFIG_PCI
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
+void zpci_rescan(void);
#else /* CONFIG_PCI */
static inline void zpci_event_error(void *e) {}
static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
#endif /* CONFIG_PCI */
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+ return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
/* Helpers */
struct zpci_dev *get_zdev(struct pci_dev *);
struct zpci_dev *get_zdev_by_fid(u32);
-bool zpci_fid_present(u32);
/* sysfs */
int zpci_sysfs_add_device(struct device *);
@@ -181,14 +177,6 @@ void zpci_sysfs_remove_device(struct device *);
int zpci_dma_init(void);
void zpci_dma_exit(void);
-/* Hotplug */
-extern struct mutex zpci_list_lock;
-extern struct list_head zpci_list;
-extern unsigned int s390_pci_probe;
-
-void zpci_register_hp_ops(struct pci_hp_callback_ops *);
-void zpci_deregister_hp_ops(void);
-
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e6a2bdd4d705..df6eac9f0cb4 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -79,11 +79,11 @@ struct zpci_fib {
} __packed;
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib);
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range);
-int s390pci_load(u64 *data, u64 req, u64 offset);
-int s390pci_store(u64 data, u64 req, u64 offset);
-int s390pci_store_block(const u64 *data, u64 req, u64 offset);
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 83a9caa6ae53..d194d544d694 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
u64 data; \
int rc; \
\
- rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \
+ rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
if (rc) \
data = -1ULL; \
return (RETTYPE) data; \
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \
u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data = (VALTYPE) val; \
\
- s390pci_store(data, req, ZPCI_OFFSET(addr)); \
+ zpci_store(data, req, ZPCI_OFFSET(addr)); \
}
zpci_read(8, u64)
@@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
val = 0; /* let FW report error */
break;
}
- return s390pci_store(val, req, offset);
+ return zpci_store(val, req, offset);
}
static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
@@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
u64 data;
int cc;
- cc = s390pci_load(&data, req, offset);
+ cc = zpci_load(&data, req, offset);
if (cc)
goto out;
@@ -115,7 +115,7 @@ out:
static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
{
- return s390pci_store_block(data, req, offset);
+ return zpci_store_block(data, req, offset);
}
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7a60bb93e83c..9b60a36c348d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -217,63 +217,57 @@ extern unsigned long MODULES_END;
/* Hardware bits in the page table entry */
#define _PAGE_CO 0x100 /* HW Change-bit override */
-#define _PAGE_RO 0x200 /* HW read-only bit */
+#define _PAGE_PROTECT 0x200 /* HW read-only bit */
#define _PAGE_INVALID 0x400 /* HW invalid bit */
+#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
/* Software bits in the page table entry */
-#define _PAGE_SWT 0x001 /* SW pte type bit t */
-#define _PAGE_SWX 0x002 /* SW pte type bit x */
-#define _PAGE_SWC 0x004 /* SW pte changed bit */
-#define _PAGE_SWR 0x008 /* SW pte referenced bit */
-#define _PAGE_SWW 0x010 /* SW pte write bit */
-#define _PAGE_SPECIAL 0x020 /* SW associated with special page */
+#define _PAGE_PRESENT 0x001 /* SW pte present bit */
+#define _PAGE_TYPE 0x002 /* SW pte type bit */
+#define _PAGE_YOUNG 0x004 /* SW pte young bit */
+#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
+#define _PAGE_READ 0x010 /* SW pte read bit */
+#define _PAGE_WRITE 0x020 /* SW pte write bit */
+#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
#define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
- _PAGE_SWC | _PAGE_SWR)
-
-/* Six different types of pages. */
-#define _PAGE_TYPE_EMPTY 0x400
-#define _PAGE_TYPE_NONE 0x401
-#define _PAGE_TYPE_SWAP 0x403
-#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
-#define _PAGE_TYPE_RO 0x200
-#define _PAGE_TYPE_RW 0x000
-
-/*
- * Only four types for huge pages, using the invalid bit and protection bit
- * of a segment table entry.
- */
-#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
-#define _HPAGE_TYPE_NONE 0x220
-#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
-#define _HPAGE_TYPE_RW 0x000
+ _PAGE_DIRTY | _PAGE_YOUNG)
/*
- * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
- * pte_none and pte_file to find out the pte type WITHOUT holding the page
- * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
- * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
- * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
- * This change is done while holding the lock, but the intermediate step
- * of a previously valid pte with the hw invalid bit set can be observed by
- * handle_pte_fault. That makes it necessary that all valid pte types with
- * the hw invalid bit set must be distinguishable from the four pte types
- * empty, none, swap and file.
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
*
- * irxt ipte irxt
- * _PAGE_TYPE_EMPTY 1000 -> 1000
- * _PAGE_TYPE_NONE 1001 -> 1001
- * _PAGE_TYPE_SWAP 1011 -> 1011
- * _PAGE_TYPE_FILE 11?1 -> 11?1
- * _PAGE_TYPE_RO 0100 -> 1100
- * _PAGE_TYPE_RW 0000 -> 1000
+ * 842100000000
+ * 000084210000
+ * 000000008421
+ * .IR...wrdytp
+ * empty .10...000000
+ * swap .10...xxxx10
+ * file .11...xxxxx0
+ * prot-none, clean, old .11...000001
+ * prot-none, clean, young .11...000101
+ * prot-none, dirty, old .10...001001
+ * prot-none, dirty, young .10...001101
+ * read-only, clean, old .11...010001
+ * read-only, clean, young .01...010101
+ * read-only, dirty, old .11...011001
+ * read-only, dirty, young .01...011101
+ * read-write, clean, old .11...110001
+ * read-write, clean, young .01...110101
+ * read-write, dirty, old .10...111001
+ * read-write, dirty, young .00...111101
*
- * pte_none is true for bits combinations 1000, 1010, 1100, 1110
- * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
- * pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
*/
#ifndef CONFIG_64BIT
@@ -286,14 +280,25 @@ extern unsigned long MODULES_END;
#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ * ..R...I.....
+ * prot-none ..1...1.....
+ * read-only ..1...0.....
+ * read-write ..0...0.....
+ * empty ..0...1.....
+ */
/* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf0000000UL
@@ -303,9 +308,7 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x00200000UL
#define PGSTE_GR_BIT 0x00040000UL
#define PGSTE_GC_BIT 0x00020000UL
-#define PGSTE_UR_BIT 0x00008000UL
-#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
+#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
@@ -324,8 +327,8 @@ extern unsigned long MODULES_END;
/* Bits in the region table entry */
#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
-#define _REGION_ENTRY_RO 0x200 /* region protection bit */
-#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */
+#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
@@ -333,29 +336,47 @@ extern unsigned long MODULES_END;
#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
-#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV)
+#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
-#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV)
+#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
-#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV)
+#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY (0)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * ..R...I...y.
+ * prot-none, old ..0...1...1.
+ * prot-none, young ..1...1...1.
+ * read-only, old ..1...1...0.
+ * read-only, young ..1...0...1.
+ * read-write, old ..0...1...0.
+ * read-write, young ..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
/* Set of bits not changed in pmd_modify */
#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
@@ -369,9 +390,7 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
-#define PGSTE_UR_BIT 0x0000800000000000UL
-#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
+#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
@@ -386,14 +405,18 @@ extern unsigned long MODULES_END;
/*
* Page protection definitions.
*/
-#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
-#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW)
-#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC)
-
-#define PAGE_KERNEL PAGE_RWC
-#define PAGE_SHARED PAGE_KERNEL
-#define PAGE_COPY PAGE_RO
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+ _PAGE_PROTECT)
/*
* On s390 the page table entry has an invalid bit and a read-only bit.
@@ -402,35 +425,31 @@ extern unsigned long MODULES_END;
*/
/*xwr*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_RO
-#define __P010 PAGE_RO
-#define __P011 PAGE_RO
-#define __P100 PAGE_RO
-#define __P101 PAGE_RO
-#define __P110 PAGE_RO
-#define __P111 PAGE_RO
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
#define __S000 PAGE_NONE
-#define __S001 PAGE_RO
-#define __S010 PAGE_RW
-#define __S011 PAGE_RW
-#define __S100 PAGE_RO
-#define __S101 PAGE_RO
-#define __S110 PAGE_RW
-#define __S111 PAGE_RW
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
/*
* Segment entry (large page) protection definitions.
*/
-#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW)
-
-static inline int mm_exclusive(struct mm_struct *mm)
-{
- return likely(mm == current->active_mm &&
- atomic_read(&mm->context.attach_count) <= 1);
-}
+#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -467,7 +486,7 @@ static inline int pgd_none(pgd_t pgd)
{
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
return 0;
- return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
+ return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pgd_bad(pgd_t pgd)
@@ -478,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pgd_val(pgd) & mask) != 0;
}
@@ -494,7 +513,7 @@ static inline int pud_none(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
return 0;
- return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
+ return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pud_large(pud_t pud)
@@ -512,7 +531,7 @@ static inline int pud_bad(pud_t pud)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pud_val(pud) & mask) != 0;
}
@@ -521,30 +540,36 @@ static inline int pud_bad(pud_t pud)
static inline int pmd_present(pmd_t pmd)
{
- unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
- return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
- !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
+ return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_none(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
- !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
+ return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_large(pmd_t pmd)
{
#ifdef CONFIG_64BIT
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
#else
return 0;
#endif
}
+static inline int pmd_prot_none(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
static inline int pmd_bad(pmd_t pmd)
{
- unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
- return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
+#ifdef CONFIG_64BIT
+ if (pmd_large(pmd))
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -563,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+ if (pmd_prot_none(pmd))
+ return 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
}
static inline int pmd_young(pmd_t pmd)
{
- return 0;
+ int young = 0;
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd))
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+ else
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+ return young;
}
-static inline int pte_none(pte_t pte)
+static inline int pte_present(pte_t pte)
{
- return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
+ /* Bit pattern: (pte & 0x001) == 0x001 */
+ return (pte_val(pte) & _PAGE_PRESENT) != 0;
}
-static inline int pte_present(pte_t pte)
+static inline int pte_none(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
- return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
- (!(pte_val(pte) & _PAGE_INVALID) &&
- !(pte_val(pte) & _PAGE_SWT));
+ /* Bit pattern: pte == 0x400 */
+ return pte_val(pte) == _PAGE_INVALID;
}
static inline int pte_file(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
- return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
+ /* Bit pattern: (pte & 0x601) == 0x600 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_PROTECT);
}
static inline int pte_special(pte_t pte)
@@ -634,6 +668,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
#endif
}
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
+}
+
static inline void pgste_set(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
@@ -644,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- unsigned long address, bits;
- unsigned char skey;
+ unsigned long address, bits, skey;
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
address = pte_val(*ptep) & PAGE_MASK;
- skey = page_get_storage_key(address);
+ skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Clear page changed & referenced bit in the storage key */
- if (bits & _PAGE_CHANGED)
+ if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
+ /* Transfer dirty + referenced bit to host bits in pgste */
+ pgste_val(pgste) |= bits << 52;
page_set_storage_key(address, skey ^ bits, 0);
- else if (bits)
+ } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
+ (bits & _PAGE_REFERENCED)) {
+ /* Transfer referenced bit to host bit in pgste */
+ pgste_val(pgste) |= PGSTE_HR_BIT;
page_reset_referenced(address);
+ }
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
- /* Get host changed & referenced bits from pgste */
- bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
- /* Transfer page changed & referenced bit to kvm user bits */
- pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
- /* Clear relevant host bits in pgste. */
- pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
/* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) |=
- (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
- /* Transfer referenced bit to pte */
- pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
#endif
return pgste;
@@ -679,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- int young;
-
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
/* Get referenced bit from storage key */
- young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
- if (young)
- pgste_val(pgste) |= PGSTE_GR_BIT;
- /* Get host referenced bit from pgste */
- if (pgste_val(pgste) & PGSTE_HR_BIT) {
- pgste_val(pgste) &= ~PGSTE_HR_BIT;
- young = 1;
- }
- /* Transfer referenced bit to kvm user bits and pte */
- if (young) {
- pgste_val(pgste) |= PGSTE_UR_BIT;
- pte_val(*ptep) |= _PAGE_SWR;
- }
+ if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
+ pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
#endif
return pgste;
}
@@ -723,13 +748,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
{
- if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) {
+ if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
*/
- pte_val(entry) |= _PAGE_SWC;
- pte_val(entry) &= ~_PAGE_RO;
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
}
*ptep = entry;
}
@@ -841,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
static inline int pte_write(pte_t pte)
{
- return (pte_val(pte) & _PAGE_SWW) != 0;
+ return (pte_val(pte) & _PAGE_WRITE) != 0;
}
static inline int pte_dirty(pte_t pte)
{
- return (pte_val(pte) & _PAGE_SWC) != 0;
+ return (pte_val(pte) & _PAGE_DIRTY) != 0;
}
static inline int pte_young(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- if (pte_val(pte) & _PAGE_SWR)
- return 1;
-#endif
- return 0;
+ return (pte_val(pte) & _PAGE_YOUNG) != 0;
}
/*
@@ -880,12 +901,12 @@ static inline void pud_clear(pud_t *pud)
static inline void pmd_clear(pmd_t *pmdp)
{
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
}
/*
@@ -896,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot);
- if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW))
- pte_val(pte) &= ~_PAGE_RO;
+ /*
+ * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+ * invalid bit set, clear it again for readable, young pages
+ */
+ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+ pte_val(pte) &= ~_PAGE_INVALID;
+ /*
+ * newprot for PAGE_READ and PAGE_WRITE has the page protection
+ * bit set, clear it again for writable, dirty pages
+ */
+ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_SWW;
- /* Do not clobber _PAGE_TYPE_NONE pages! */
- if (!(pte_val(pte) & _PAGE_INVALID))
- pte_val(pte) |= _PAGE_RO;
+ pte_val(pte) &= ~_PAGE_WRITE;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- pte_val(pte) |= _PAGE_SWW;
- if (pte_val(pte) & _PAGE_SWC)
- pte_val(pte) &= ~_PAGE_RO;
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pte_val(pte) & _PAGE_DIRTY)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_SWC;
- /* Do not clobber _PAGE_TYPE_NONE pages! */
- if (!(pte_val(pte) & _PAGE_INVALID))
- pte_val(pte) |= _PAGE_RO;
+ pte_val(pte) &= ~_PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- pte_val(pte) |= _PAGE_SWC;
- if (pte_val(pte) & _PAGE_SWW)
- pte_val(pte) &= ~_PAGE_RO;
+ pte_val(pte) |= _PAGE_DIRTY;
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkold(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- pte_val(pte) &= ~_PAGE_SWR;
-#endif
+ pte_val(pte) &= ~_PAGE_YOUNG;
+ pte_val(pte) |= _PAGE_INVALID;
return pte;
}
static inline pte_t pte_mkyoung(pte_t pte)
{
+ pte_val(pte) |= _PAGE_YOUNG;
+ if (pte_val(pte) & _PAGE_READ)
+ pte_val(pte) &= ~_PAGE_INVALID;
return pte;
}
@@ -957,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
#ifdef CONFIG_HUGETLB_PAGE
static inline pte_t pte_mkhuge(pte_t pte)
{
- pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
+ pte_val(pte) |= _PAGE_LARGE;
return pte;
}
#endif
@@ -974,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste);
- dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
+ pgste_val(pgste) &= ~PGSTE_HC_BIT;
pgste_set_unlock(ptep, pgste);
return dirty;
}
@@ -994,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
- young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
- pgste_val(pgste) &= ~PGSTE_UR_BIT;
+ young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
+ pgste_val(pgste) &= ~PGSTE_HR_BIT;
pgste_set_unlock(ptep, pgste);
}
return young;
}
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+#ifndef CONFIG_64BIT
+ /* pto must point to the start of the segment table */
+ pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
+#else
+ /* ipte in zarch mode can do the math */
+ pte_t *pto = ptep;
+#endif
+ asm volatile(
+ " ipte %2,%3"
+ : "=m" (*ptep) : "m" (*ptep),
+ "a" (pto), "a" (address));
+ }
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active = (mm == current->active_mm) ? 1 : 0;
+
+ if (atomic_read(&mm->context.attach_count) > active)
+ __ptep_ipte(address, ptep);
+ else
+ mm->context.flush_mm = 1;
+}
+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
+ int young;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_update_young(ptep, pgste);
- pte = *ptep;
- *ptep = pte_mkold(pte);
- pgste_set_unlock(ptep, pgste);
- return pte_young(pte);
+ pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
}
- return 0;
+
+ pte = *ptep;
+ __ptep_ipte(addr, ptep);
+ young = pte_young(pte);
+ pte = pte_mkold(pte);
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste_set_pte(ptep, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+
+ return young;
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB
- * With virtualization we handle the reference bit, without we
- * we can simply return */
return ptep_test_and_clear_young(vma, address, ptep);
}
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
- if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-#ifndef CONFIG_64BIT
- /* pto must point to the start of the segment table */
- pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
- /* ipte in zarch mode can do the math */
- pte_t *pto = ptep;
-#endif
- asm volatile(
- " ipte %2,%3"
- : "=m" (*ptep) : "m" (*ptep),
- "a" (pto), "a" (address));
- }
-}
-
/*
* This is hard to understand. ptep_get_and_clear and ptep_clear_flush
* both clear the TLB for the unmapped pte. The reason is that
@@ -1067,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
@@ -1093,15 +1136,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) |= _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
@@ -1117,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
pgste_t pgste;
if (mm_has_pgste(mm)) {
- pgste = *(pgste_t *)(ptep + PTRS_PER_PTE);
+ pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte);
pgste_set_pte(ptep, pte);
pgste_set_unlock(ptep, pgste);
@@ -1139,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
pte = *ptep;
__ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_update_all(&pte, pgste);
@@ -1163,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- if (mm_has_pgste(mm)) {
+ if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- if (!full)
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
if (!full)
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
- if (mm_has_pgste(mm)) {
+ if (!full && mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste_set_unlock(ptep, pgste);
}
@@ -1189,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
pte_t pte = *ptep;
if (pte_write(pte)) {
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
+ ptep_flush_lazy(mm, address, ptep);
pte = pte_wrprotect(pte);
if (mm_has_pgste(mm)) {
@@ -1240,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot);
- return __pte;
+ return pte_mkyoung(__pte);
}
static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1248,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
unsigned long physpage = page_to_phys(page);
pte_t __pte = mk_pte_phys(physpage, pgprot);
- if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) {
- pte_val(__pte) |= _PAGE_SWC;
- pte_val(__pte) &= ~_PAGE_RO;
- }
+ if (pte_write(__pte) && PageDirty(page))
+ __pte = pte_mkdirty(__pte);
return __pte;
}
@@ -1313,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
unsigned long sto = (unsigned long) pmdp -
pmd_index(address) * sizeof(pmd_t);
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+ if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
asm volatile(
" .insn rrf,0xb98e0000,%2,%3,0,0"
: "=m" (*pmdp)
@@ -1324,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
}
}
+static inline void __pmd_csp(pmd_t *pmdp)
+{
+ register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+ register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+ _SEGMENT_ENTRY_INVALID;
+ register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+ asm volatile(
+ " csp %1,%3"
+ : "=m" (*pmdp)
+ : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
{
/*
- * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
+ * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
* Convert to segment table entry format.
*/
if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
return pgprot_val(SEGMENT_NONE);
- if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
- return pgprot_val(SEGMENT_RO);
- return pgprot_val(SEGMENT_RW);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+ return pgprot_val(SEGMENT_READ);
+ return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
+ int young;
+
+ young = pmd_young(pmd);
pmd_val(pmd) &= _SEGMENT_CHG_MASK;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (young)
+ pmd = pmd_mkyoung(pmd);
return pmd;
}
@@ -1349,14 +1430,14 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return __pmd;
+ return pmd_mkyoung(__pmd);
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
- /* Do not clobber _HPAGE_TYPE_NONE pages! */
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
@@ -1389,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+ if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
pmd_val(entry) |= _SEGMENT_ENTRY_CO;
*pmdp = entry;
}
@@ -1402,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1412,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
-static inline pmd_t pmd_mkold(pmd_t pmd)
-{
- /* No referenced bit in the segment table entry. */
- return pmd;
-}
-
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
-{
- /* No referenced bit in the segment table entry. */
- return pmd;
-}
-
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
- long tmp, rc;
- int counter;
+ pmd_t pmd;
- rc = 0;
- if (MACHINE_HAS_RRBM) {
- counter = PTRS_PER_PTE >> 6;
- asm volatile(
- "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
- " ogr %1,%0\n"
- " la %3,0(%4,%3)\n"
- " brct %2,0b\n"
- : "=&d" (tmp), "+&d" (rc), "+d" (counter),
- "+a" (pmd_addr)
- : "a" (64 * 4096UL) : "cc");
- rc = !!rc;
- } else {
- counter = PTRS_PER_PTE;
- asm volatile(
- "0: rrbe 0,%2\n"
- " la %2,0(%3,%2)\n"
- " brc 12,1f\n"
- " lhi %0,1\n"
- "1: brct %1,0b\n"
- : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
- : "a" (4096UL) : "cc");
- }
- return rc;
+ pmd = *pmdp;
+ __pmd_idte(address, pmdp);
+ *pmdp = pmd_mkold(pmd);
+ return pmd_young(pmd);
}
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1521,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 21 and bit 22 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 30 and 31 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 1-19 and bits 24-29 to store type and offset.
* We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
* plus 24 for the offset.
@@ -1538,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 53 and bit 54 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 62 and 63 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 0-51 and bits 56-61 to store type and offset.
* We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
* plus 56 for the offset.
@@ -1558,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
offset &= __SWP_OFFSET_MASK;
- pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
+ pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
return pte;
}
@@ -1581,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define pgoff_to_pte(__off) \
((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
- | _PAGE_TYPE_FILE })
+ | _PAGE_INVALID | _PAGE_PROTECT })
#endif /* !__ASSEMBLY__ */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 83c85c217f5c..0eb37505cab1 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -93,7 +93,15 @@ struct thread_struct {
#endif
};
-#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE 1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND 2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
typedef struct thread_struct thread_struct;
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000000..5b3e48ef534b
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index f3a9e0f92704..6dbd559763c9 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,9 +8,10 @@
#define __ASM_SWITCH_TO_H
#include <linux/thread_info.h>
+#include <asm/ptrace.h>
extern struct task_struct *__switch_to(void *, void *);
-extern void update_per_regs(struct task_struct *task);
+extern void update_cr_regs(struct task_struct *task);
static inline void save_fp_regs(s390_fp_regs *fpregs)
{
@@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs)
static inline void save_access_regs(unsigned int *acrs)
{
- asm volatile("stam 0,15,%0" : "=Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
}
static inline void restore_access_regs(unsigned int *acrs)
{
- asm volatile("lam 0,15,%0" : : "Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
}
#define switch_to(prev,next,last) do { \
@@ -86,7 +91,7 @@ static inline void restore_access_regs(unsigned int *acrs)
restore_fp_regs(&next->thread.fp_regs); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- update_per_regs(next); \
+ update_cr_regs(next); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d686684..2cb846c4b37f 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
struct mm_struct *mm;
struct mmu_table_batch *batch;
unsigned int fullmm;
+ unsigned long start, end;
};
struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
static inline void tlb_gather_mmu(struct mmu_gather *tlb,
struct mm_struct *mm,
- unsigned int full_mm_flush)
+ unsigned long start,
+ unsigned long end)
{
tlb->mm = mm;
- tlb->fullmm = full_mm_flush;
+ tlb->start = start;
+ tlb->end = end;
+ tlb->fullmm = !(start | (end+1));
tlb->batch = NULL;
if (tlb->fullmm)
__tlb_flush_mm(mm);
@@ -59,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
static inline void tlb_flush_mmu(struct mmu_gather *tlb)
{
+ __tlb_flush_mm_lazy(tlb->mm);
tlb_table_flush(tlb);
}
static inline void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
/*
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 6b32af30878c..f9fef0425fee 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
__tlb_flush_full(mm);
}
-static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
if (mm->context.flush_mm) {
__tlb_flush_mm(mm);
@@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- __tlb_flush_mm_cond(mm);
+ __tlb_flush_mm_lazy(mm);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- __tlb_flush_mm_cond(vma->vm_mm);
+ __tlb_flush_mm_lazy(vma->vm_mm);
}
static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 3aa9f1ec5b29..7a84619e315e 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -400,6 +400,7 @@ typedef struct
#define PTRACE_POKE_SYSTEM_CALL 0x5008
#define PTRACE_ENABLE_TE 0x5009
#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
/*
* PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 64b24650e4f8..dd62071624be 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -173,7 +173,7 @@ error:
}
}
-static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu)
+static struct cache_dir *cache_create_cache_dir(int cpu)
{
struct cache_dir *cache_dir;
struct kobject *kobj = NULL;
@@ -289,9 +289,8 @@ static struct kobj_type cache_index_type = {
.default_attrs = cache_index_default_attrs,
};
-static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir,
- struct cache *cache, int index,
- int cpu)
+static int cache_create_index_dir(struct cache_dir *cache_dir,
+ struct cache *cache, int index, int cpu)
{
struct cache_index_dir *index_dir;
int rc;
@@ -313,7 +312,7 @@ out:
return rc;
}
-static int __cpuinit cache_add_cpu(int cpu)
+static int cache_add_cpu(int cpu)
{
struct cache_dir *cache_dir;
struct cache *cache;
@@ -335,7 +334,7 @@ static int __cpuinit cache_add_cpu(int cpu)
return 0;
}
-static void __cpuinit cache_remove_cpu(int cpu)
+static void cache_remove_cpu(int cpu)
{
struct cache_index_dir *index, *next;
struct cache_dir *cache_dir;
@@ -354,8 +353,8 @@ static void __cpuinit cache_remove_cpu(int cpu)
cache_dir_cpu[cpu] = NULL;
}
-static int __cpuinit cache_hotplug(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cache_hotplug(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
int cpu = (long)hcpu;
int rc = 0;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f703d91bf720..d8f355657171 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,48 @@
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy up to one page to vmalloc or real memory
+ */
+static ssize_t copy_page_real(void *buf, void *src, size_t csize)
+{
+ size_t size;
+
+ if (is_vmalloc_addr(buf)) {
+ BUG_ON(csize >= PAGE_SIZE);
+ /* If buf is not page aligned, copy first part */
+ size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize);
+ if (size) {
+ if (memcpy_real(load_real_addr(buf), src, size))
+ return -EFAULT;
+ buf += size;
+ src += size;
+ }
+ /* Copy second part */
+ size = csize - size;
+ return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0;
+ } else {
+ return memcpy_real(buf, src, csize);
+ }
+}
+
/*
* Copy one page from "oldmem"
*
@@ -32,6 +74,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
unsigned long src;
+ int rc;
if (!csize)
return 0;
@@ -43,11 +86,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
src < OLDMEM_BASE + OLDMEM_SIZE)
src -= OLDMEM_BASE;
if (userbuf)
- copy_to_user_real((void __force __user *) buf, (void *) src,
- csize);
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
else
- memcpy_real(buf, (void *) src, csize);
- return csize;
+ rc = copy_page_real(buf, (void *) src, csize);
+ return (rc == 0) ? csize : rc;
}
/*
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index be7a408be7a1..cc30d1fb000c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -18,6 +18,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 4
@@ -435,6 +436,11 @@ io_skip:
io_loop:
l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
+ lhi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lhi %r3,THIN_INTERRUPT
+io_call:
basr %r14,%r1 # call do_IRQ
tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
jz io_return
@@ -584,9 +590,10 @@ ext_skip:
mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
TRACE_IRQS_OFF
+ l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
- l %r1,BASED(.Ldo_extint)
- basr %r14,%r1 # call do_extint
+ lhi %r3,EXT_INTERRUPT
+ basr %r14,%r1 # call do_IRQ
j io_return
/*
@@ -879,13 +886,13 @@ cleanup_idle:
stm %r9,%r10,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- n %r8,BASED(cleanup_idle_wait) # clear wait state bit
+ n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
l %r9,24(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
.long psw_idle_lpsw + 0x80000000
cleanup_idle_wait:
- .long 0xfffdffff
+ .long 0xfcfdffff
/*
* Integer constants
@@ -902,7 +909,6 @@ cleanup_idle_wait:
.Ldo_machine_check: .long s390_do_machine_check
.Lhandle_mcck: .long s390_handle_mcck
.Ldo_IRQ: .long do_IRQ
-.Ldo_extint: .long do_extint
.Ldo_signal: .long do_signal
.Ldo_notify_resume: .long do_notify_resume
.Ldo_per_trap: .long do_per_trap
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1c039d0c24c7..2b2188b97c6a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -19,6 +19,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -468,6 +469,11 @@ io_skip:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
io_loop:
lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lghi %r3,THIN_INTERRUPT
+io_call:
brasl %r14,do_IRQ
tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
jz io_return
@@ -623,7 +629,8 @@ ext_skip:
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_extint
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
j io_return
/*
@@ -922,7 +929,7 @@ cleanup_idle:
stg %r9,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- nihh %r8,0xfffd # clear wait state bit
+ nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 54b0995514e8..b34ba0ea96a9 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -22,6 +22,7 @@
#include <asm/cputime.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -42,9 +43,10 @@ struct irq_class {
* Since the external and I/O interrupt fields are already sums we would end
* up with having a sum which accounts each interrupt twice.
*/
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
- [EXTERNAL_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ [EXT_INTERRUPT] = {.name = "EXT"},
+ [IO_INTERRUPT] = {.name = "I/O"},
+ [THIN_INTERRUPT] = {.name = "AIO"},
};
/*
@@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
};
+void __init init_IRQ(void)
+{
+ irq_reserve_irqs(0, THIN_INTERRUPT);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
@@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
+ goto out;
}
if (irq < NR_IRQS) {
+ if (irq >= NR_IRQS_BASE)
+ goto out;
seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
- goto skip_arch_irqs;
+ goto out;
}
for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
if (irqclass_sub_desc[irq].desc)
seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
seq_putc(p, '\n');
}
-skip_arch_irqs:
+out:
put_online_cpus();
return 0;
}
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ return 0;
+}
+
/*
* Switch to the asynchronous interrupt stack for softirq execution.
*/
@@ -159,14 +192,6 @@ asmlinkage void do_softirq(void)
local_irq_restore(flags);
}
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
- if (proc_mkdir("irq", NULL))
- create_prof_cpu_mask();
-}
-#endif
-
/*
* ext_int_hash[index] is the list head for all external interrupts that hash
* to this index.
@@ -183,14 +208,6 @@ struct ext_int_info {
/* ext_int_hash_lock protects the handler lists for external interrupts */
DEFINE_SPINLOCK(ext_int_hash_lock);
-static void __init init_external_interrupts(void)
-{
- int idx;
-
- for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
- INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
-
static inline int ext_hash(u16 code)
{
return (code + (code >> 9)) & 0xff;
@@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
}
EXPORT_SYMBOL(unregister_external_interrupt);
-void __irq_entry do_extint(struct pt_regs *regs)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
{
+ struct pt_regs *regs = get_irq_regs();
struct ext_code ext_code;
- struct pt_regs *old_regs;
struct ext_int_info *p;
int index;
- old_regs = set_irq_regs(regs);
- irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
- /* Serve timer interrupts first. */
- clock_comparator_work();
- }
- kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
ext_code = *(struct ext_code *) &regs->int_code;
if (ext_code.code != 0x1004)
__get_cpu_var(s390_idle).nohz_delay = 1;
@@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs)
p->handler(ext_code, regs->int_parm,
regs->int_parm_long);
rcu_read_unlock();
- irq_exit();
- set_irq_regs(old_regs);
+
+ return IRQ_HANDLED;
}
-void __init init_IRQ(void)
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
{
- init_external_interrupts();
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_LIST_HEAD(&ext_int_hash[idx]);
+
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
}
static DEFINE_SPINLOCK(sc_irq_lock);
@@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void)
spin_unlock(&ma_subclass_lock);
}
EXPORT_SYMBOL(measurement_alert_subclass_unregister);
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- /*
- * Not needed, the handler is protected by a lock and IRQs that occur
- * after the handler is deleted are just NOPs.
- */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
-#endif
-
-#ifndef CONFIG_PCI
-
-/* Only PCI devices have dynamically-defined IRQ handlers */
-
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
-void enable_irq(unsigned int irq)
-{
- WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
-
-void disable_irq(unsigned int irq)
-{
- WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(disable_irq);
-
-#endif /* !CONFIG_PCI */
-
-void disable_irq_nosync(unsigned int irq)
-{
- disable_irq(irq);
-}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
-
-unsigned long probe_irq_on(void)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_on);
-
-int probe_irq_off(unsigned long val)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long val)
-{
- return val;
-}
-EXPORT_SYMBOL_GPL(probe_irq_mask);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b2a07d..adbbe7f1cb0d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xeb:
- if ((insn[2] & 0xff) == 0x44 || /* bxhg */
- (insn[2] & 0xff) == 0x45) /* bxleg */
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
break;
case 0xe3: /* bctg */
if ((insn[2] & 0xff) == 0x46)
fixup = FIXUP_BRANCH_NOT_TAKEN;
break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
}
return fixup;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 504175ebf8b0..c4c033819879 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
: "0", "cc");
#endif
/* Revalidate clock comparator register */
- if (S390_lowcore.clock_comparator == -1)
- set_clock_comparator(S390_lowcore.mcck_clock);
- else
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(S390_lowcore.clock_comparator);
/* Check if old PSW is valid */
if (!mci->wp)
/*
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 390d9ae57bb2..fb99c2057b85 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -639,8 +639,8 @@ static struct pmu cpumf_pmu = {
.cancel_txn = cpumf_pmu_cancel_txn,
};
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (long) hcpu;
int flags;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index a6fc037671b1..500aa1029bcb 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -52,12 +52,13 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
static bool is_in_guest(struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
-
if (user_mode(regs))
return false;
-
- return ip == (unsigned long) &sie_exit;
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
}
static unsigned long guest_is_user_mode(struct pt_regs *regs)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bc3eddae34a..c5dbb335716d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,6 +71,7 @@ void arch_cpu_idle(void)
}
/* Halt the cpu and keep track of cpu time accounting. */
vtime_stop_cpu();
+ local_irq_enable();
}
void arch_cpu_idle_exit(void)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 753c41d0ffd3..24612029f450 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
/*
* cpu_init - initializes state that is per-CPU.
*/
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
struct cpuid *id = &__get_cpu_var(cpu_id);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a314c57f4e94..9556905bd3ce 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -47,7 +47,7 @@ enum s390_regset {
REGSET_GENERAL_EXTENDED,
};
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
@@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task)
#ifdef CONFIG_64BIT
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr0, cr0_new;
+ unsigned long cr[3], cr_new[3];
- __ctl_store(cr0, 0, 0);
- /* set or clear transaction execution bits 8 and 9. */
+ __ctl_store(cr, 0, 2);
+ cr_new[1] = cr[1];
+ /* Set or clear transaction execution TXC bit 8. */
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr0_new = cr0 & ~(3UL << 54);
+ cr_new[0] = cr[0] & ~(1UL << 55);
else
- cr0_new = cr0 | (3UL << 54);
- /* Only load control register 0 if necessary. */
- if (cr0 != cr0_new)
- __ctl_load(cr0_new, 0, 0);
+ cr_new[0] = cr[0] | (1UL << 55);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ cr_new[2] = cr[2] & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new[2] |= 1UL;
+ else
+ cr_new[2] |= 2UL;
+ }
+ if (memcmp(&cr_new, &cr, sizeof(cr)))
+ __ctl_load(cr_new, 0, 2);
}
#endif
/* Copy user specified PER registers */
@@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task)
{
set_tsk_thread_flag(task, TIF_SINGLE_STEP);
if (task == current)
- update_per_regs(task);
+ update_cr_regs(task);
}
void user_disable_single_step(struct task_struct *task)
{
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
if (task == current)
- update_per_regs(task);
+ update_cr_regs(task);
}
/*
@@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request,
if (!MACHINE_HAS_TE)
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
default:
/* Removing high order bit from addr (only for 31 bit). */
@@ -1271,7 +1299,7 @@ int regs_query_register_offset(const char *name)
if (!name || *name != 'r')
return -EINVAL;
- if (strict_strtoul(name + 1, 10, &offset))
+ if (kstrtoul(name + 1, 10, &offset))
return -EINVAL;
if (offset >= NUM_GPRS)
return -EINVAL;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 497451ec5e26..aeed8a61fa0d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -994,6 +994,7 @@ static void __init setup_hwcaps(void)
strcpy(elf_platform, "z196");
break;
case 0x2827:
+ case 0x2828:
strcpy(elf_platform, "zEC12");
break;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 15a016c10563..d386c4e9d2e5 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,7 +165,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
pcpu_sigp_retry(pcpu, order, 0);
}
-static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
struct _lowcore *lc;
@@ -616,10 +616,9 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
return info;
}
-static int __cpuinit smp_add_present_cpu(int cpu);
+static int smp_add_present_cpu(int cpu);
-static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info,
- int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
@@ -685,7 +684,7 @@ static void __init smp_detect_cpus(void)
/*
* Activate a secondary processor.
*/
-static void __cpuinit smp_start_secondary(void *cpuvoid)
+static void smp_start_secondary(void *cpuvoid)
{
S390_lowcore.last_update_clock = get_tod_clock();
S390_lowcore.restart_stack = (unsigned long) restart_stack;
@@ -708,7 +707,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)
}
/* Upping and downing of CPUs */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
struct pcpu *pcpu;
int rc;
@@ -964,8 +963,8 @@ static struct attribute_group cpu_online_attr_group = {
.attrs = cpu_online_attrs,
};
-static int __cpuinit smp_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
struct cpu *c = &pcpu_devices[cpu].cpu;
@@ -983,7 +982,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
return notifier_from_errno(err);
}
-static int __cpuinit smp_add_present_cpu(int cpu)
+static int smp_add_present_cpu(int cpu)
{
struct cpu *c = &pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index c479d2f9605b..737bff38e3ee 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -10,6 +10,9 @@
#include <linux/suspend.h>
#include <linux/mm.h>
#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
/*
* References to section boundaries
@@ -211,3 +214,11 @@ void restore_processor_state(void)
__ctl_set_bit(0,28);
local_mcck_enable();
}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index c487be4cfc81..6b09fdffbd2f 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -281,11 +281,8 @@ restore_registers:
lghi %r2,0
brasl %r14,arch_set_page_states
- /* Log potential guest relocation */
- brasl %r14,lgr_info_log
-
- /* Reinitialize the channel subsystem */
- brasl %r14,channel_subsystem_reinit
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
/* Return 0 */
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 62f89d98e880..811f542b8ed4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -418,7 +418,7 @@ void s390_adjust_jiffies(void)
/*
* calibrate the delay loop
*/
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
{
s390_adjust_jiffies();
/* Print the good old Bogomips line .. */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 876546b9cfa1..064c3082ab33 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -92,7 +92,6 @@ void clock_comparator_work(void)
struct clock_event_device *cd;
S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
cd = &__get_cpu_var(comparators);
cd->event_handler(cd);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb60..05d75c413137 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
else if (strncmp(s, "off", 4) == 0)
vdso_enabled = 0;
else {
- rc = strict_strtoul(s, 0, &val);
+ rc = kstrtoul(s, 0, &val);
vdso_enabled = rc ? 0 : !!val;
}
return !rc;
@@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
PAGE_SIZE << SEGMENT_ORDER);
- clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) page_table, _PAGE_INVALID,
256*sizeof(unsigned long));
*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
- *(unsigned long *) page_table = _PAGE_RO + page_frame;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
psal = (u32 *) (page_table + 256*sizeof(unsigned long));
aste = psal + 32;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 3fb09359eda6..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
+#include <asm/vtime.h>
#include <asm/irq.h>
#include "entry.h"
@@ -371,14 +372,14 @@ EXPORT_SYMBOL(del_virt_timer);
/*
* Start the virtual CPU timer on the current CPU.
*/
-void __cpuinit init_cpu_vtimer(void)
+void init_cpu_vtimer(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
}
-static int __cpuinit s390_nohz_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct s390_idle_data *idle;
long cpu = (long) hcpu;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 302e0e52b009..99d789e8a018 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret = PTR_RET((void __force *)__uptr); \
+ int __ret; \
\
- if (!__ret) { \
+ if (IS_ERR((void __force *)__uptr)) { \
+ __ret = PTR_ERR((void __force *)__uptr); \
+ } else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = get_user(x, __uptr); \
} \
@@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret = PTR_RET((void __force *)__uptr); \
+ int __ret; \
\
- if (!__ret) { \
+ if (IS_ERR((void __force *)__uptr)) { \
+ __ret = PTR_ERR((void __force *)__uptr); \
+ } else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = put_user(x, __uptr); \
} \
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ac8e6670c551..776dafe918db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -709,14 +709,25 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
vcpu->arch.sie_block->icptcode = 0;
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
atomic_read(&vcpu->arch.sie_block->cpuflags));
trace_kvm_s390_sie_enter(vcpu,
atomic_read(&vcpu->arch.sie_block->cpuflags));
+
+ /*
+ * As PF_VCPU will be used in fault handler, between guest_enter
+ * and guest_exit should be no uaccess.
+ */
+ preempt_disable();
+ kvm_guest_enter();
+ preempt_enable();
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
+ kvm_guest_exit();
+
+ VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+ vcpu->arch.sie_block->icptcode);
+ trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
if (rc > 0)
rc = 0;
if (rc < 0) {
@@ -728,10 +739,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
}
- VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
- vcpu->arch.sie_block->icptcode);
- trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
- kvm_guest_exit();
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
return rc;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8f8d8ee9b1fb..59200ee275e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/compat.h>
#include <asm/asm-offsets.h>
+#include <asm/facility.h>
#include <asm/current.h>
#include <asm/debug.h>
#include <asm/ebcdic.h>
@@ -526,8 +527,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* Only provide non-quiescing support if the host supports it */
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
- S390_lowcore.stfl_fac_list & 0x00020000)
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* No support for conditional-SSKE */
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index c61b9fad43cc..57c87d7d7ede 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs)
do {
set_clock_comparator(end);
vtime_stop_cpu();
- local_irq_disable();
} while (get_tod_clock() < end);
lockdep_on();
__ctl_load(cr0, 0, 0);
@@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs)
set_clock_comparator(end);
}
vtime_stop_cpu();
- local_irq_disable();
if (clock_saved)
local_tick_enable(clock_saved);
} while (get_tod_clock() < end);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 50ea137a2d3c..1694d738b175 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm,
switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
table = table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x39UL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION2:
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x3aUL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION3:
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x3bUL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_SEGMENT:
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV))
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
return -0x10UL;
if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
- if (write && (*table & _SEGMENT_ENTRY_RO))
+ if (write && (*table & _SEGMENT_ENTRY_PROTECT))
return -0x04UL;
return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
(address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
@@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm,
table = table + ((address >> 12) & 0xff);
if (unlikely(*table & _PAGE_INVALID))
return -0x11UL;
- if (write && (*table & _PAGE_RO))
+ if (write && (*table & _PAGE_PROTECT))
return -0x04UL;
return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
}
@@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm,
unsigned long *table = (unsigned long *)__pa(mm->pgd);
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV))
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
return -0x10UL;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
table = table + ((address >> 12) & 0xff);
if (unlikely(*table & _PAGE_INVALID))
return -0x11UL;
- if (write && (*table & _PAGE_RO))
+ if (write && (*table & _PAGE_PROTECT))
return -0x04UL;
return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 3ad65b04ac15..46d517c3c763 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
seq_printf(m, "I\n");
return;
}
- seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW ");
+ seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
seq_putc(m, '\n');
}
@@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
/*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
*/
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd, unsigned long addr)
@@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
st->current_address = addr;
pte = pte_offset_kernel(pmd, addr);
- prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+ prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
note_page(m, st, prot, 4);
addr += PAGE_SIZE;
}
}
#ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO)
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
#else
#define _PMD_PROT_MASK 0
#endif
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 047c3e4c59a2..f00aefb66a4e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -639,8 +639,8 @@ out:
put_task_struct(tsk);
}
-static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct thread_struct *thread, *next;
struct task_struct *tsk;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215c..5d758db27bdc 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
pte_t *ptep, pte;
struct page *page;
- mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+ mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
struct page *head, *page, *tail;
int refs;
- result = write ? 0 : _SEGMENT_ENTRY_RO;
- mask = result | _SEGMENT_ENTRY_INV;
+ result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+ mask = result | _SEGMENT_ENTRY_INVALID;
if ((pmd_val(pmd) & mask) != result)
return 0;
VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 121089d57802..248445f92604 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -8,21 +8,127 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+ int none, young, prot;
+ pmd_t pmd;
+
+ /*
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp ..R...I...y.
+ * empty .10...000000 -> ..0...1...0.
+ * prot-none, clean, old .11...000001 -> ..0...1...1.
+ * prot-none, clean, young .11...000101 -> ..1...1...1.
+ * prot-none, dirty, old .10...001001 -> ..0...1...1.
+ * prot-none, dirty, young .10...001101 -> ..1...1...1.
+ * read-only, clean, old .11...010001 -> ..1...1...0.
+ * read-only, clean, young .01...010101 -> ..1...0...1.
+ * read-only, dirty, old .11...011001 -> ..1...1...0.
+ * read-only, dirty, young .01...011101 -> ..1...0...1.
+ * read-write, clean, old .11...110001 -> ..0...1...0.
+ * read-write, clean, young .01...110101 -> ..0...0...1.
+ * read-write, dirty, old .10...111001 -> ..0...1...0.
+ * read-write, dirty, young .00...111101 -> ..0...0...1.
+ * Huge ptes are dirty by definition, a clean pte is made dirty
+ * by the conversion.
+ */
+ if (pte_present(pte)) {
+ pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+ if (pte_val(pte) & _PAGE_INVALID)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ none = (pte_val(pte) & _PAGE_PRESENT) &&
+ !(pte_val(pte) & _PAGE_READ) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ prot = (pte_val(pte) & _PAGE_PROTECT) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ young = pte_val(pte) & _PAGE_YOUNG;
+ if (none || young)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (prot || (none && young))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else
+ pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+ return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+ pte_t pte;
+
+ /*
+ * Convert encoding pmd bits pte bits
+ * ..R...I...y. .IR...wrdytp
+ * empty ..0...1...0. -> .10...000000
+ * prot-none, old ..0...1...1. -> .10...001001
+ * prot-none, young ..1...1...1. -> .10...001101
+ * read-only, old ..1...1...0. -> .11...011001
+ * read-only, young ..1...0...1. -> .01...011101
+ * read-write, old ..0...1...0. -> .10...111001
+ * read-write, young ..0...0...1. -> .00...111101
+ * Huge ptes are dirty by definition
+ */
+ if (pmd_present(pmd)) {
+ pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+ (pmd_val(pmd) & PAGE_MASK);
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+ pte_val(pte) |= _PAGE_INVALID;
+ if (pmd_prot_none(pmd)) {
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_YOUNG;
+ } else {
+ pte_val(pte) |= _PAGE_READ;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+ pte_val(pte) |= _PAGE_YOUNG;
+ }
+ } else
+ pte_val(pte) = _PAGE_INVALID;
+ return pte;
+}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *pteptr, pte_t pteval)
+ pte_t *ptep, pte_t pte)
{
- pmd_t *pmdp = (pmd_t *) pteptr;
- unsigned long mask;
+ pmd_t pmd;
+ pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
- pteptr = (pte_t *) pte_page(pteval)[1].index;
- mask = pte_val(pteval) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= pte_page(pte)[1].index;
+ } else
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+ *(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long origin;
+ pmd_t pmd;
+
+ pmd = *(pmd_t *) ptep;
+ if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+ origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= *(unsigned long *) origin;
}
+ return __pmd_to_pte(pmd);
+}
- pmd_val(*pmdp) = pte_val(pteval);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pmd_t *pmdp = (pmd_t *) ptep;
+ pte_t pte = huge_ptep_get(ptep);
+
+ if (MACHINE_HAS_IDTE)
+ __pmd_idte(addr, pmdp);
+ else
+ __pmd_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ return pte;
}
int arch_prepare_hugepage(struct page *page)
@@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
- clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) ptep, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ce36ea80e4f9..ad446b0c55b6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -69,6 +69,7 @@ static void __init setup_zero_pages(void)
order = 2;
break;
case 0x2827: /* zEC12 */
+ case 0x2828: /* zEC12 */
default:
order = 5;
break;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 80adfbf75065..990397420e6b 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
pte = pte_offset_kernel(pmd, address);
if (!enable) {
__ptep_ipte(address, pte);
- pte_val(*pte) = _PAGE_TYPE_EMPTY;
+ pte_val(*pte) = _PAGE_INVALID;
continue;
}
pte_val(*pte) = __pa(address);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 967d0bf1c059..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
struct gmap_rmap *rmap;
struct page *page;
- if (*table & _SEGMENT_ENTRY_INV)
+ if (*table & _SEGMENT_ENTRY_INVALID)
return 0;
page = pfn_to_page(*table >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
kfree(rmap);
break;
}
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
return 1;
}
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap,
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
- if (*table & _REGION_ENTRY_INV) {
+ if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the guest addr space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 20) & 0x7ff);
/* Clear segment table entry in guest address space. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV;
+ *table = _SEGMENT_ENTRY_INVALID;
}
out:
spin_unlock(&gmap->mm->page_table_lock);
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the gmap address space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
/* Store 'from' address in an invalid segment table entry. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+ *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
}
spin_unlock(&gmap->mm->page_table_lock);
up_read(&gmap->mm->mmap_sem);
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
unsigned long *table;
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
return PTR_ERR(segment_ptr);
/* Convert the gmap address to an mm address. */
segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INV)) {
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
page = pfn_to_page(segment >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_RO) {
+ } else if (segment & _SEGMENT_ENTRY_PROTECT) {
vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
return vmaddr | (address & ~PMD_MASK);
}
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry =
- _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
list_del(&rmap->list);
kfree(rmap);
flush = 1;
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
/* Convert the gmap address to an mm address. */
while (1) {
segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INV)) {
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
/* Page table is present */
page = pfn_to_page(segment >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
return mp->vmaddr | (address & ~PMD_MASK);
}
- if (!(segment & _SEGMENT_ENTRY_RO))
+ if (!(segment & _SEGMENT_ENTRY_PROTECT))
/* Nothing mapped in the gmap address space. */
break;
rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
while (address < to) {
/* Walk the gmap address space page table */
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
continue;
/* Set notification bit in the pgste of the pte */
entry = *ptep;
- if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
+ if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
pgste = pgste_get_lock(ptep);
pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
@@ -757,8 +758,9 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
page->index = (unsigned long) mp;
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
+ PAGE_SIZE/2);
return table;
}
@@ -796,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
- unsigned long address, bits;
- unsigned char skey;
+ unsigned long address, bits, skey;
address = pte_val(*ptep) & PAGE_MASK;
- skey = page_get_storage_key(address);
+ skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set storage key ACC and FP */
- page_set_storage_key(address,
- (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
- !nq);
-
+ page_set_storage_key(address, skey, !nq);
/* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52;
- /* Transfer skey changed & referenced bit to kvm user bits */
- pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
}
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
- pgste_val(new) |= PGSTE_UC_BIT;
+ pgste_val(new) |= PGSTE_HC_BIT;
pgste_set_unlock(ptep, new);
pte_unmap_unlock(*ptep, ptl);
@@ -888,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
pgtable_page_ctor(page);
atomic_set(&page->_mapcount, 1);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
} else {
@@ -1017,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
struct mmu_table_batch **batch = &tlb->batch;
if (*batch) {
- __tlb_flush_mm(tlb->mm);
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
}
@@ -1027,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb->mm->context.flush_mm = 1;
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)
__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- __tlb_flush_mm(tlb->mm);
+ __tlb_flush_mm_lazy(tlb->mm);
tlb_remove_table_one(table);
return;
}
@@ -1039,7 +1036,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
}
(*batch)->tables[(*batch)->nr++] = table;
if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1171,9 +1168,9 @@ int s390_enable_sie(void)
thp_split_mm(mm);
/* Reallocate the page tables with pgstes */
mm->context.has_pgste = 1;
- tlb_gather_mmu(&tlb, mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
page_table_realloc(&tlb, mm, 0, TASK_SIZE);
- tlb_finish_mmu(&tlb, 0, -1);
+ tlb_finish_mmu(&tlb, 0, TASK_SIZE);
up_write(&mm->mmap_sem);
return mm->context.has_pgste ? 0 : -ENOMEM;
}
@@ -1251,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
list_del(lh);
}
ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
ptep++;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8b268fcc4612..bcfb70b60be6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) pte, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
pud_val(*pu_dir) = __pa(address) |
_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
- (ro ? _REGION_ENTRY_RO : 0);
+ (ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
@@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
!(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
- (ro ? _SEGMENT_ENTRY_RO : 0);
+ _SEGMENT_ENTRY_YOUNG |
+ (ro ? _SEGMENT_ENTRY_PROTECT : 0);
address += PMD_SIZE;
continue;
}
@@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
}
pt_dir = pte_offset_kernel(pm_dir, address);
- pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
+ pte_val(*pt_dir) = __pa(address) |
+ pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
address += PAGE_SIZE;
}
ret = 0;
@@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
pte_t *pt_dir;
pte_t pte;
- pte_val(pte) = _PAGE_TYPE_EMPTY;
+ pte_val(pte) = _PAGE_INVALID;
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
new_page =__pa(vmem_alloc_pages(0));
if (!new_page)
goto out;
- pte_val(*pt_dir) = __pa(new_page);
+ pte_val(*pt_dir) =
+ __pa(new_page) | pgprot_val(PAGE_KERNEL);
}
address += PAGE_SIZE;
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 82f165f8078c..d5f10a43a58f 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -9,6 +9,8 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/facility.h>
@@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
EMIT2(0x07fe);
}
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+ struct sk_buff skb_probe = {
+ .pkt_type = ~0,
+ };
+ char *ct = (char *)&skb_probe;
+ int off;
+
+ pkt_type_offset = -1;
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (!ct[off])
+ continue;
+ if (ct[off] == PKT_TYPE_MAX)
+ pkt_type_offset = off;
+ else {
+ /* Found non matching bit pattern, fix needed. */
+ WARN_ON_ONCE(1);
+ pkt_type_offset = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
/*
* make sure we dont leak kernel information to user
*/
@@ -720,6 +753,16 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
EMIT4_DISP(0x88500000, 12);
}
break;
+ case BPF_S_ANC_PKTTYPE:
+ if (pkt_type_offset < 0)
+ goto out;
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* ic %r5,<d(pkt_type_offset)>(%r2) */
+ EMIT4_DISP(0x43502000, pkt_type_offset);
+ /* srl %r5,5 */
+ EMIT4_DISP(0x88500000, 5);
+ break;
case BPF_S_ANC_CPU: /* A = smp_processor_id() */
#ifdef CONFIG_SMP
/* l %r5,<d(cpu_nr)> */
@@ -738,8 +781,41 @@ out:
return -1;
}
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ * sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+ unsigned int pages;
+ u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+ u8 **image_ptr)
+{
+ struct bpf_binary_header *header;
+ unsigned int sz, hole;
+
+ /* Most BPF filters are really small, but if some of them fill a page,
+ * allow at least 128 extra bytes for illegal instructions.
+ */
+ sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+ header = module_alloc(sz);
+ if (!header)
+ return NULL;
+ memset(header, 0, sz);
+ header->pages = sz / PAGE_SIZE;
+ hole = sz - bpfsize + sizeof(*header);
+ /* Insert random number of illegal instructions before BPF code
+ * and make sure the first instruction starts at an even address.
+ */
+ *image_ptr = &header->image[(prandom_u32() % hole) & -2];
+ return header;
+}
+
void bpf_jit_compile(struct sk_filter *fp)
{
+ struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
struct bpf_jit jit, cjit;
unsigned int *addrs;
@@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp)
} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
prg_len = jit.prg - jit.start;
lit_len = jit.lit - jit.mid;
- size = max_t(unsigned long, prg_len + lit_len,
- sizeof(struct work_struct));
+ size = prg_len + lit_len;
if (size >= BPF_SIZE_MAX)
goto out;
- jit.start = module_alloc(size);
- if (!jit.start)
+ header = bpf_alloc_binary(size, &jit.start);
+ if (!header)
goto out;
jit.prg = jit.mid = jit.start + prg_len;
jit.lit = jit.end = jit.start + prg_len + lit_len;
@@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp)
cjit = jit;
}
if (bpf_jit_enable > 1) {
- pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
- fp->len, jit.end - jit.start, pass, jit.start);
- if (jit.start) {
- printk(KERN_ERR "JIT code:\n");
+ bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+ if (jit.start)
print_fn_code(jit.start, jit.mid - jit.start);
- print_hex_dump(KERN_ERR, "JIT literals:\n",
- DUMP_PREFIX_ADDRESS, 16, 1,
- jit.mid, jit.end - jit.mid, false);
- }
}
- if (jit.start)
+ if (jit.start) {
+ set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.start;
+ }
out:
kfree(addrs);
}
-static void jit_free_defer(struct work_struct *arg)
-{
- module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
void bpf_jit_free(struct sk_filter *fp)
{
- struct work_struct *work;
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
if (fp->bpf_func == sk_run_filter)
return;
- work = (struct work_struct *)fp->bpf_func;
- INIT_WORK(work, jit_free_defer);
- schedule_work(work);
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index ffeb17ce7f31..930783d2c99b 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -440,7 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
switch (id.machine) {
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
- case 0x2827: ops->cpu_type = "s390/zEC12"; break;
+ case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
default: return -ENODEV;
}
}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 086a2e37935d..a9e1dc4ae442 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,5 +2,5 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e2956ad39a4f..f17a8343e360 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,45 +42,26 @@
#define SIC_IRQ_MODE_SINGLE 1
#define ZPCI_NR_DMA_SPACES 1
-#define ZPCI_MSI_VEC_BITS 6
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
/* list of all detected zpci devices */
-LIST_HEAD(zpci_list);
-EXPORT_SYMBOL_GPL(zpci_list);
-DEFINE_MUTEX(zpci_list_lock);
-EXPORT_SYMBOL_GPL(zpci_list_lock);
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
-static struct pci_hp_callback_ops *hotplug_ops;
+static void zpci_enable_irq(struct irq_data *data);
+static void zpci_disable_irq(struct irq_data *data);
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
-static DEFINE_SPINLOCK(zpci_domain_lock);
-
-struct callback {
- irq_handler_t handler;
- void *data;
+static struct irq_chip zpci_irq_chip = {
+ .name = "zPCI",
+ .irq_unmask = zpci_enable_irq,
+ .irq_mask = zpci_disable_irq,
};
-struct zdev_irq_map {
- unsigned long aibv; /* AI bit vector */
- int msi_vecs; /* consecutive MSI-vectors used */
- int __unused;
- struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
- spinlock_t lock; /* protect callbacks against de-reg */
-};
-
-struct intr_bucket {
- /* amap of adapters, one bit per dev, corresponds to one irq nr */
- unsigned long *alloc;
- /* AI summary bit, global page for all devices */
- unsigned long *aisb;
- /* pointer to aibv and callback data in zdev */
- struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
- /* protects the whole bucket struct */
- spinlock_t lock;
-};
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
-static struct intr_bucket *bucket;
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
/* Adapter interrupt definitions */
static void zpci_irq_handler(struct airq_struct *airq);
@@ -96,27 +77,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
-static struct kmem_cache *zdev_irq_cache;
static struct kmem_cache *zdev_fmb_cache;
-static inline int irq_to_msi_nr(unsigned int irq)
-{
- return irq & ZPCI_MSI_MASK;
-}
-
-static inline int irq_to_dev_nr(unsigned int irq)
-{
- return irq >> ZPCI_MSI_VEC_BITS;
-}
-
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
- return bucket->imap[irq_to_dev_nr(irq)];
-}
-
struct zpci_dev *get_zdev(struct pci_dev *pdev)
{
return (struct zpci_dev *) pdev->sysdata;
@@ -126,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_for_each_entry(tmp, &zpci_list, entry) {
if (tmp->fid == fid) {
zdev = tmp;
break;
}
}
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
return zdev;
}
-bool zpci_fid_present(u32 fid)
-{
- return (get_zdev_by_fid(fid) != NULL) ? true : false;
-}
-
static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
{
return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -160,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus)
EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
- u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
struct zpci_fib *fib;
@@ -172,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
return -ENOMEM;
fib->isc = PCI_ISC;
- fib->noi = zdev->irq_map->msi_vecs;
fib->sum = 1; /* enable summary notifications */
- fib->aibv = aibv;
- fib->aibvo = 0; /* every function has its own page */
- fib->aisb = (u64) bucket->aisb + aisb / 8;
- fib->aisbo = aisb & ZPCI_MSI_MASK;
+ fib->noi = airq_iv_end(zdev->aibv);
+ fib->aibv = (unsigned long) zdev->aibv->vector;
+ fib->aibvo = 0; /* each zdev has its own interrupt vector */
+ fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+ fib->aisbo = zdev->aisb & 63;
- rc = s390pci_mod_fc(req, fib);
+ rc = zpci_mod_fc(req, fib);
pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
free_page((unsigned long) fib);
@@ -209,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args
fib->iota = args->iota;
fib->fmb_addr = args->fmb_addr;
- rc = s390pci_mod_fc(req, fib);
+ rc = zpci_mod_fc(req, fib);
free_page((unsigned long) fib);
return rc;
}
@@ -234,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
}
/* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
@@ -283,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
u64 data;
int rc;
- rc = s390pci_load(&data, req, offset);
+ rc = zpci_load(&data, req, offset);
if (!rc) {
data = data << ((8 - len) * 8);
data = le64_to_cpu(data);
@@ -301,25 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
data = cpu_to_le64(data);
data = data >> ((8 - len) * 8);
- rc = s390pci_store(data, req, offset);
+ rc = zpci_store(data, req, offset);
return rc;
}
-void enable_irq(unsigned int irq)
+static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
+{
+ int offset, pos;
+ u32 mask_bits;
+
+ if (msi->msi_attrib.is_msix) {
+ offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+ msi->masked = readl(msi->mask_base + offset);
+ writel(flag, msi->mask_base + offset);
+ } else if (msi->msi_attrib.maskbit) {
+ pos = (long) msi->mask_base;
+ pci_read_config_dword(msi->dev, pos, &mask_bits);
+ mask_bits &= ~(mask);
+ mask_bits |= flag & mask;
+ pci_write_config_dword(msi->dev, pos, mask_bits);
+ } else
+ return 0;
+
+ msi->msi_attrib.maskbit = !!flag;
+ return 1;
+}
+
+static void zpci_enable_irq(struct irq_data *data)
{
- struct msi_desc *msi = irq_get_msi_desc(irq);
+ struct msi_desc *msi = irq_get_msi_desc(data->irq);
zpci_msi_set_mask_bits(msi, 1, 0);
}
-EXPORT_SYMBOL_GPL(enable_irq);
-void disable_irq(unsigned int irq)
+static void zpci_disable_irq(struct irq_data *data)
{
- struct msi_desc *msi = irq_get_msi_desc(irq);
+ struct msi_desc *msi = irq_get_msi_desc(data->irq);
zpci_msi_set_mask_bits(msi, 1, 1);
}
-EXPORT_SYMBOL_GPL(disable_irq);
void pcibios_fixup_bus(struct pci_bus *bus)
{
@@ -404,152 +381,147 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
static void zpci_irq_handler(struct airq_struct *airq)
{
- unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
- int rescan = 0, max = aisb_max;
- struct zdev_irq_map *imap;
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
inc_irq_stat(IRQIO_PCI);
- sbit = start;
-
-scan:
- /* find summary_bit */
- for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
- clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
- last = sbit;
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ si = 0;
+ continue;
+ }
- /* find vector bit */
- imap = bucket->imap[sbit];
- for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_aibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
inc_irq_stat(IRQIO_MSI);
- clear_bit(63 - mbit, &imap->aibv);
-
- spin_lock(&imap->lock);
- if (imap->cb[mbit].handler)
- imap->cb[mbit].handler(mbit,
- imap->cb[mbit].data);
- spin_unlock(&imap->lock);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
}
}
-
- if (rescan)
- goto out;
-
- /* scan the skipped bits */
- if (start > 0) {
- sbit = 0;
- max = start;
- start = 0;
- goto scan;
- }
-
- /* enable interrupts again */
- set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
- /* check again to not lose initiative */
- rmb();
- max = aisb_max;
- sbit = find_first_bit_left(bucket->aisb, max);
- if (sbit != max) {
- rescan++;
- goto scan;
- }
-out:
- /* store next device bit to scan */
- __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
}
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct zpci_dev *zdev = get_zdev(pdev);
- unsigned int aisb, msi_nr;
+ unsigned int hwirq, irq, msi_vecs;
+ unsigned long aisb;
struct msi_desc *msi;
+ struct msi_msg msg;
int rc;
- /* store the number of used MSI vectors */
- zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
-
- spin_lock(&bucket->lock);
- aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
- /* alloc map exhausted? */
- if (aisb == PAGE_SIZE) {
- spin_unlock(&bucket->lock);
- return -EIO;
- }
- set_bit(aisb, bucket->alloc);
- spin_unlock(&bucket->lock);
+ pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+ if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+ return -EINVAL;
+ msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+ msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+ /* Allocate adapter summary indicator bit */
+ rc = -EIO;
+ aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+ if (aisb == -1UL)
+ goto out;
zdev->aisb = aisb;
- if (aisb + 1 > aisb_max)
- aisb_max = aisb + 1;
- /* wire up IRQ shortcut pointer */
- bucket->imap[zdev->aisb] = zdev->irq_map;
- pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+ /* Create adapter interrupt vector */
+ rc = -ENOMEM;
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ goto out_si;
- /* TODO: irq number 0 wont be found if we return less than requested MSIs.
- * ignore it for now and fix in common code.
- */
- msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+ /* Wire up shortcut pointer */
+ zpci_aibv[aisb] = zdev->aibv;
+ /* Request MSI interrupts */
+ hwirq = 0;
list_for_each_entry(msi, &pdev->msi_list, list) {
- rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
- aisb << ZPCI_MSI_VEC_BITS);
+ rc = -EIO;
+ irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
+ if (irq == NO_IRQ)
+ goto out_msi;
+ rc = irq_set_msi_desc(irq, msi);
if (rc)
- return rc;
- msi_nr++;
+ goto out_msi;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_simple_irq);
+ msg.data = hwirq;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ write_msi_msg(irq, &msg);
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ hwirq++;
}
- rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
- if (rc) {
- clear_bit(aisb, bucket->alloc);
- dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
- return rc;
+ /* Enable adapter interrupts */
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ goto out_msi;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (hwirq-- == 0)
+ break;
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
}
- return (zdev->irq_map->msi_vecs == msi_vecs) ?
- 0 : zdev->irq_map->msi_vecs;
+ zpci_aibv[aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+out_si:
+ airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+ dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+ return rc;
}
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
struct msi_desc *msi;
- int aisb, rc;
+ int rc;
- rc = zpci_unregister_airq(zdev);
+ pr_info("%s: on pdev: %p\n", __func__, pdev);
+
+ /* Disable adapter interrupts */
+ rc = zpci_clear_airq(zdev);
if (rc) {
dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
return;
}
- msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
- aisb = irq_to_dev_nr(msi->irq);
-
- list_for_each_entry(msi, &pdev->msi_list, list)
- zpci_teardown_msi_irq(zdev, msi);
-
- clear_bit(aisb, bucket->alloc);
- if (aisb + 1 == aisb_max)
- aisb_max--;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
- if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
- return -EINVAL;
- return zpci_setup_msi(pdev, nvec);
-}
+ /* Release MSI interrupts */
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ zpci_msi_set_mask_bits(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
- pr_info("%s: on pdev: %p\n", __func__, pdev);
- zpci_teardown_msi(pdev);
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
}
static void zpci_map_resources(struct zpci_dev *zdev)
@@ -564,8 +536,6 @@ static void zpci_map_resources(struct zpci_dev *zdev)
continue;
pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
- pr_debug("BAR%i: -> start: %Lx end: %Lx\n",
- i, pdev->resource[i].start, pdev->resource[i].end);
}
}
@@ -589,162 +559,47 @@ struct zpci_dev *zpci_alloc_device(void)
/* Alloc memory for our private pci device data */
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
- if (!zdev)
- return ERR_PTR(-ENOMEM);
-
- /* Alloc aibv & callback space */
- zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
- if (!zdev->irq_map)
- goto error;
- WARN_ON((u64) zdev->irq_map & 0xff);
- return zdev;
-
-error:
- kfree(zdev);
- return ERR_PTR(-ENOMEM);
+ return zdev ? : ERR_PTR(-ENOMEM);
}
void zpci_free_device(struct zpci_dev *zdev)
{
- kmem_cache_free(zdev_irq_cache, zdev->irq_map);
kfree(zdev);
}
-/*
- * Too late for any s390 specific setup, since interrupts must be set up
- * already which requires DMA setup too and the pci scan will access the
- * config space, which only works if the function handle is enabled.
- */
-int pcibios_enable_device(struct pci_dev *pdev, int mask)
-{
- struct resource *res;
- u16 cmd;
- int i;
-
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- res = &pdev->resource[i];
-
- if (res->flags & IORESOURCE_IO)
- return -EINVAL;
-
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- pci_write_config_word(pdev, PCI_COMMAND, cmd);
- return 0;
-}
-
int pcibios_add_platform_entries(struct pci_dev *pdev)
{
return zpci_sysfs_add_device(&pdev->dev);
}
-int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
-{
- int msi_nr = irq_to_msi_nr(irq);
- struct zdev_irq_map *imap;
- struct msi_desc *msi;
-
- msi = irq_get_msi_desc(irq);
- if (!msi)
- return -EIO;
-
- imap = get_imap(irq);
- spin_lock_init(&imap->lock);
-
- pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
- imap->cb[msi_nr].handler = handler;
- imap->cb[msi_nr].data = data;
-
- /*
- * The generic MSI code returns with the interrupt disabled on the
- * card, using the MSI mask bits. Firmware doesn't appear to unmask
- * at that level, so we do it here by hand.
- */
- zpci_msi_set_mask_bits(msi, 1, 0);
- return 0;
-}
-
-void zpci_free_irq(unsigned int irq)
-{
- struct zdev_irq_map *imap = get_imap(irq);
- int msi_nr = irq_to_msi_nr(irq);
- unsigned long flags;
-
- pr_debug("%s: for irq: %d\n", __func__, irq);
-
- spin_lock_irqsave(&imap->lock, flags);
- imap->cb[msi_nr].handler = NULL;
- imap->cb[msi_nr].data = NULL;
- spin_unlock_irqrestore(&imap->lock, flags);
-}
-
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n",
- __func__, irq, handler, irqflags, devname);
-
- return zpci_request_irq(irq, handler, dev_id);
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- zpci_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
static int __init zpci_irq_init(void)
{
- int cpu, rc;
-
- bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
- if (!bucket)
- return -ENOMEM;
-
- bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->aisb) {
- rc = -ENOMEM;
- goto out_aisb;
- }
-
- bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->alloc) {
- rc = -ENOMEM;
- goto out_alloc;
- }
+ int rc;
rc = register_adapter_interrupt(&zpci_airq);
if (rc)
- goto out_ai;
+ goto out;
/* Set summary to 1 to be called every time for the ISC. */
*zpci_airq.lsi_ptr = 1;
- for_each_online_cpu(cpu)
- per_cpu(next_sbit, cpu) = 0;
+ rc = -ENOMEM;
+ zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_aisb_iv)
+ goto out_airq;
- spin_lock_init(&bucket->lock);
- set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
return 0;
-out_ai:
- free_page((unsigned long) bucket->alloc);
-out_alloc:
- free_page((unsigned long) bucket->aisb);
-out_aisb:
- kfree(bucket);
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
return rc;
}
static void zpci_irq_exit(void)
{
- free_page((unsigned long) bucket->alloc);
- free_page((unsigned long) bucket->aisb);
+ airq_iv_release(zpci_aisb_iv);
unregister_adapter_interrupt(&zpci_airq);
- kfree(bucket);
}
static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
@@ -801,16 +656,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
int pcibios_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ int i;
+
+ zdev->pdev = pdev;
+ zpci_map_resources(zdev);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+ if (res->parent || !res->flags)
+ continue;
+ pci_claim_resource(pdev, i);
+ }
+
+ return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ u16 cmd;
+ int i;
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
zpci_fmb_enable_device(zdev);
zpci_map_resources(zdev);
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+
+ if (res->flags & IORESOURCE_IO)
+ return -EINVAL;
+
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
return 0;
}
-void pcibios_release_device(struct pci_dev *pdev)
+void pcibios_disable_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
@@ -898,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev)
rc = zpci_dma_init_device(zdev);
if (rc)
goto out_dma;
+
+ zdev->state = ZPCI_FN_STATE_ONLINE;
return 0;
out_dma:
@@ -926,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
goto out_free;
-
- zdev->state = ZPCI_FN_STATE_ONLINE;
}
rc = zpci_scan_bus(zdev);
if (rc)
goto out_disable;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
- if (hotplug_ops)
- hotplug_ops->create_slot(zdev);
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
+
+ zpci_init_slot(zdev);
return 0;
@@ -967,15 +855,10 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
- zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
- L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
- if (!zdev_irq_cache)
- goto error_zdev;
-
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
16, 0, NULL);
if (!zdev_fmb_cache)
- goto error_fmb;
+ goto error_zdev;
/* TODO: use realloc */
zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -986,8 +869,6 @@ static int zpci_mem_init(void)
error_iomap:
kmem_cache_destroy(zdev_fmb_cache);
-error_fmb:
- kmem_cache_destroy(zdev_irq_cache);
error_zdev:
return -ENOMEM;
}
@@ -995,28 +876,10 @@ error_zdev:
static void zpci_mem_exit(void)
{
kfree(zpci_iomap_start);
- kmem_cache_destroy(zdev_irq_cache);
kmem_cache_destroy(zdev_fmb_cache);
}
-void zpci_register_hp_ops(struct pci_hp_callback_ops *ops)
-{
- mutex_lock(&zpci_list_lock);
- hotplug_ops = ops;
- mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_register_hp_ops);
-
-void zpci_deregister_hp_ops(void)
-{
- mutex_lock(&zpci_list_lock);
- hotplug_ops = NULL;
- mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops);
-
-unsigned int s390_pci_probe;
-EXPORT_SYMBOL_GPL(s390_pci_probe);
+static unsigned int s390_pci_probe;
char * __init pcibios_setup(char *str)
{
@@ -1044,16 +907,12 @@ static int __init pci_base_init(void)
rc = zpci_debug_init();
if (rc)
- return rc;
+ goto out;
rc = zpci_mem_init();
if (rc)
goto out_mem;
- rc = zpci_msihash_init();
- if (rc)
- goto out_hash;
-
rc = zpci_irq_init();
if (rc)
goto out_irq;
@@ -1062,7 +921,7 @@ static int __init pci_base_init(void)
if (rc)
goto out_dma;
- rc = clp_find_pci_devices();
+ rc = clp_scan_pci_devices();
if (rc)
goto out_find;
@@ -1073,11 +932,15 @@ out_find:
out_dma:
zpci_irq_exit();
out_irq:
- zpci_msihash_exit();
-out_hash:
zpci_mem_exit();
out_mem:
zpci_debug_exit();
+out:
return rc;
}
-subsys_initcall(pci_base_init);
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+ clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 2e9539625d93..475563c3d1e4 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data)
return cc;
}
-static void *clp_alloc_block(void)
+static void *clp_alloc_block(gfp_t gfp_mask)
{
- return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+ return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
}
static void clp_free_block(void *ptr)
@@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
struct clp_req_rsp_query_pci_grp *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
struct clp_req_rsp_query_pci *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -179,9 +179,9 @@ error:
static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
- int rc, retries = 1000;
+ int rc, retries = 100;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
retries--;
if (retries < 0)
break;
- msleep(1);
+ msleep(20);
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
@@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev)
return rc;
}
-static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry)
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+ void (*cb)(struct clp_fh_list_entry *entry))
{
- int present, rc;
-
- if (!entry->vendor_id)
- return;
-
- /* TODO: be a little bit more scalable */
- present = zpci_fid_present(entry->fid);
-
- if (present)
- pr_debug("%s: device %x already present\n", __func__, entry->fid);
-
- /* skip already used functions */
- if (present && entry->config_state)
- return;
-
- /* aev 306: function moved to stand-by state */
- if (present && !entry->config_state) {
- /*
- * The handle is already disabled, that means no iota/irq freeing via
- * the firmware interfaces anymore. Need to free resources manually
- * (DMA memory, debug, sysfs)...
- */
- zpci_stop_device(get_zdev_by_fid(entry->fid));
- return;
- }
-
- rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
- if (rc)
- pr_err("Failed to add fid: 0x%x\n", entry->fid);
-}
-
-int clp_find_pci_devices(void)
-{
- struct clp_req_rsp_list_pci *rrb;
u64 resume_token = 0;
int entries, i, rc;
- rrb = clp_alloc_block();
- if (!rrb)
- return -ENOMEM;
-
do {
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
@@ -316,12 +279,101 @@ int clp_find_pci_devices(void)
resume_token = rrb->response.resume_token;
for (i = 0; i < entries; i++)
- clp_check_pcifn_entry(&rrb->response.fh_list[i]);
+ cb(&rrb->response.fh_list[i]);
} while (resume_token);
pr_debug("Maximum number of supported PCI functions: %u\n",
rrb->response.max_fn);
out:
+ return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+ if (!entry->vendor_id)
+ return;
+
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev) {
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+ return;
+ }
+
+ if (!entry->config_state) {
+ /*
+ * The handle is already disabled, that means no iota/irq freeing via
+ * the firmware interfaces anymore. Need to free resources manually
+ * (DMA memory, debug, sysfs)...
+ */
+ zpci_stop_device(zdev);
+ }
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ return;
+
+ zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_add);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_rescan);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_NOWAIT);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_update);
+
clp_free_block(rrb);
return rc;
}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index a2343c1f6e04..7e5573acb063 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <asm/pci_dma.h>
@@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
*/
goto no_refresh;
- rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
- nr_pages * PAGE_SIZE);
+ rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+ nr_pages * PAGE_SIZE);
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int zpci_dma_init_device(struct zpci_dev *zdev)
{
- unsigned int bitmap_order;
int rc;
spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- bitmap_order = get_order(zdev->iommu_pages / 8);
- pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n",
- zdev->iommu_size, zdev->iommu_pages, bitmap_order);
-
- zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- bitmap_order);
+ zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->iommu_bitmap) {
rc = -ENOMEM;
goto out_reg;
@@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
{
zpci_unregister_ioat(zdev, 0);
dma_cleanup_tables(zdev);
- free_pages((unsigned long) zdev->iommu_bitmap,
- get_order(zdev->iommu_pages / 8));
+ vfree(zdev->iommu_bitmap);
zdev->iommu_bitmap = NULL;
zdev->next_bit = 0;
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ec62e3a0dc09..0aecaf954845 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
break;
case 0x0306:
- clp_find_pci_devices();
+ clp_rescan_pci_devices();
break;
default:
break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 22eeb9d7ffeb..85267c058af8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
return cc;
}
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib)
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
{
u8 cc, status;
@@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
return cc;
}
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
{
u8 cc, status;
@@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
{
asm volatile (
" .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
@@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_load(u64 *data, u64 req, u64 offset)
+int zpci_load(u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_load);
+EXPORT_SYMBOL_GPL(zpci_load);
/* PCI Store */
static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
@@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_store(u64 data, u64 req, u64 offset)
+int zpci_store(u64 data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_store);
+EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
@@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_store_block(const u64 *data, u64 req, u64 offset)
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_store_block);
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
deleted file mode 100644
index b097aed05a9b..000000000000
--- a/arch/s390/pci/pci_msi.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/rculist.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <asm/hw_irq.h>
-
-/* mapping of irq numbers to msi_desc */
-static struct hlist_head *msi_hash;
-static const unsigned int msi_hash_bits = 8;
-#define MSI_HASH_BUCKETS (1U << msi_hash_bits)
-#define msi_hashfn(nr) hash_long(nr, msi_hash_bits)
-
-static DEFINE_SPINLOCK(msi_map_lock);
-
-struct msi_desc *__irq_get_msi_desc(unsigned int irq)
-{
- struct msi_map *map;
-
- hlist_for_each_entry_rcu(map,
- &msi_hash[msi_hashfn(irq)], msi_chain)
- if (map->irq == irq)
- return map->msi;
- return NULL;
-}
-
-int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
- if (msi->msi_attrib.is_msix) {
- int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL;
- msi->masked = readl(msi->mask_base + offset);
- writel(flag, msi->mask_base + offset);
- } else {
- if (msi->msi_attrib.maskbit) {
- int pos;
- u32 mask_bits;
-
- pos = (long) msi->mask_base;
- pci_read_config_dword(msi->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(msi->dev, pos, mask_bits);
- } else {
- return 0;
- }
- }
-
- msi->msi_attrib.maskbit = !!flag;
- return 1;
-}
-
-int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
- unsigned int nr, int offset)
-{
- struct msi_map *map;
- struct msi_msg msg;
- int rc;
-
- map = kmalloc(sizeof(*map), GFP_KERNEL);
- if (map == NULL)
- return -ENOMEM;
-
- map->irq = nr;
- map->msi = msi;
- zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
- INIT_HLIST_NODE(&map->msi_chain);
-
- pr_debug("%s hashing irq: %u to bucket nr: %llu\n",
- __func__, nr, msi_hashfn(nr));
- hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
-
- spin_lock(&msi_map_lock);
- rc = irq_set_msi_desc(nr, msi);
- if (rc) {
- spin_unlock(&msi_map_lock);
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
- return rc;
- }
- spin_unlock(&msi_map_lock);
-
- msg.data = nr - offset;
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- msg.address_hi = zdev->msi_addr >> 32;
- write_msi_msg(nr, &msg);
- return 0;
-}
-
-void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
-{
- int irq = msi->irq & ZPCI_MSI_MASK;
- struct msi_map *map;
-
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- zpci_msi_set_mask_bits(msi, 1, 1);
-
- spin_lock(&msi_map_lock);
- map = zdev->msi_map[irq];
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[irq] = NULL;
- spin_unlock(&msi_map_lock);
-}
-
-/*
- * The msi hash table has 256 entries which is good for 4..20
- * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
- * the hash table size adjustable later.
- */
-int __init zpci_msihash_init(void)
-{
- unsigned int i;
-
- msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL);
- if (!msi_hash)
- return -ENOMEM;
-
- for (i = 0; i < MSI_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&msi_hash[i]);
- return 0;
-}
-
-void __init zpci_msihash_exit(void)
-{
- kfree(msi_hash);
-}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index e99a2557f186..cf8a12ff733b 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
+static void recover_callback(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
+ int ret;
+
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ return;
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ return;
+
+ pci_rescan_bus(zdev->bus);
+}
+
+static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc = device_schedule_callback(dev, recover_callback);
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
+
static struct device_attribute *zpci_dev_attrs[] = {
&dev_attr_function_id,
&dev_attr_function_handle,
&dev_attr_pchid,
&dev_attr_pfgid,
+ &dev_attr_recover,
NULL,
};