diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-03-08 12:12:18 +0100 |
---|---|---|
committer | Christian Borntraeger <borntraeger@de.ibm.com> | 2016-06-20 09:54:04 +0200 |
commit | 4be130a08420d6918d80c1067f8078f425eb98df (patch) | |
tree | c3e323bf6597eea8e588586550e378acf7652ce2 /arch/s390/mm/pgtable.c | |
parent | s390/mm: add reference counter to gmap structure (diff) | |
download | linux-4be130a08420d6918d80c1067f8078f425eb98df.tar.xz linux-4be130a08420d6918d80c1067f8078f425eb98df.zip |
s390/mm: add shadow gmap support
For a nested KVM guest the outer KVM host needs to create shadow
page tables for the nested guest. This patch adds the basic support
to the guest address space (gmap) code.
For each guest address space the inner KVM host creates, the first
outer KVM host needs to create shadow page tables. The address space
is identified by the ASCE loaded into the control register 1 at the
time the inner SIE instruction for the second nested KVM guest is
executed. The outer KVM host creates the shadow tables starting with
the table identified by the ASCE on a on-demand basis. The outer KVM
host will get repeated faults for all the shadow tables needed to
run the second KVM guest.
While a shadow page table for the second KVM guest is active the access
to the origin region, segment and page tables needs to be restricted
for the first KVM guest. For region and segment and page tables the first
KVM guest may read the memory, but write attempt has to lead to an
unshadow. This is done using the page invalid and read-only bits in the
page table of the first KVM guest. If the first guest re-accesses one of
the origin pages of a shadow, it gets a fault and the affected parts of
the shadow page table hierarchy needs to be removed again.
PGSTE tables don't have to be shadowed, as all interpretation assist can't
deal with the invalid bits in the shadow pte being set differently than
the original ones provided by the first KVM guest.
Many bug fixes and improvements by David Hildenbrand.
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r-- | arch/s390/mm/pgtable.c | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ab65fb11e058..5b02583fbf4c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -184,9 +184,12 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - ptep_notify(mm, addr, ptep); + unsigned long bits; + + bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); + if (bits) { + pgste_val(pgste) ^= bits; + ptep_notify(mm, addr, ptep, bits); } #endif return pgste; @@ -420,12 +423,13 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) * @addr: virtual address in the guest address space * @ptep: pointer to the page table entry * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bit: pgste bit to set (e.g. for notification) * * Returns 0 if the access rights were changed and -EAGAIN if the current * and requested access rights are incompatible. */ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, int prot) + pte_t *ptep, int prot, unsigned long bit) { pte_t entry; pgste_t pgste; @@ -441,7 +445,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, pgste_set_unlock(ptep, pgste); return -EAGAIN; } - /* Change access rights and set the pgste notification bit */ + /* Change access rights and set pgste bit */ if (prot == PROT_NONE && !pte_i) { ptep_flush_direct(mm, addr, ptep); pgste = pgste_update_all(entry, pgste, mm); @@ -452,12 +456,53 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, pte_val(entry) &= ~_PAGE_INVALID; pte_val(entry) |= _PAGE_PROTECT; } - pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_val(pgste) |= bit; pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); return 0; } +int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, + pte_t *sptep, pte_t *tptep, int write) +{ + pgste_t spgste, tpgste; + pte_t spte, tpte; + int rc = -EAGAIN; + + spgste = pgste_get_lock(sptep); + spte = *sptep; + if (!(pte_val(spte) & _PAGE_INVALID) && + !(pte_val(spte) & _PAGE_PROTECT)) { + rc = 0; + if (!(pte_val(*tptep) & _PAGE_INVALID)) + /* Update existing mapping */ + ptep_flush_direct(mm, saddr, tptep); + else + rc = 1; + pgste_val(spgste) |= PGSTE_VSIE_BIT; + tpgste = pgste_get_lock(tptep); + pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | + (write ? 0 : _PAGE_PROTECT); + /* don't touch the storage key - it belongs to parent pgste */ + tpgste = pgste_set_pte(tptep, tpgste, tpte); + pgste_set_unlock(tptep, tpgste); + } + pgste_set_unlock(sptep, spgste); + return rc; +} + +void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) +{ + pgste_t pgste; + + pgste = pgste_get_lock(ptep); + /* notifier is called by the caller */ + ptep_flush_direct(mm, saddr, ptep); + /* don't touch the storage key - it belongs to parent pgste */ + pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); + pgste_set_unlock(ptep, pgste); +} + static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) |