diff options
author | Will Deacon <will@kernel.org> | 2020-10-02 13:16:11 +0200 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2020-10-02 13:16:11 +0200 |
commit | baab853229ec1f291cec6a70ed61ce93159d0997 (patch) | |
tree | 0da235bb4a50e32d80a5d703eea2a56626113cc1 | |
parent | Merge branch 'for-next/ghostbusters' into for-next/core (diff) | |
parent | arm64: mte: Fix typo in memory tagging ABI documentation (diff) | |
download | linux-baab853229ec1f291cec6a70ed61ce93159d0997.tar.xz linux-baab853229ec1f291cec6a70ed61ce93159d0997.zip |
Merge branch 'for-next/mte' into for-next/core
Add userspace support for the Memory Tagging Extension introduced by
Armv8.5.
(Catalin Marinas and others)
* for-next/mte: (30 commits)
arm64: mte: Fix typo in memory tagging ABI documentation
arm64: mte: Add Memory Tagging Extension documentation
arm64: mte: Kconfig entry
arm64: mte: Save tags when hibernating
arm64: mte: Enable swap of tagged pages
mm: Add arch hooks for saving/restoring tags
fs: Handle intra-page faults in copy_mount_options()
arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset
arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support
arm64: mte: Allow {set,get}_tagged_addr_ctrl() on non-current tasks
arm64: mte: Restore the GCR_EL1 register after a suspend
arm64: mte: Allow user control of the generated random tags via prctl()
arm64: mte: Allow user control of the tag check mode via prctl()
mm: Allow arm64 mmap(PROT_MTE) on RAM-based files
arm64: mte: Validate the PROT_MTE request via arch_validate_flags()
mm: Introduce arch_validate_flags()
arm64: mte: Add PROT_MTE support to mmap() and mprotect()
mm: Introduce arch_calc_vm_flag_bits()
arm64: mte: Tags-aware aware memcmp_pages() implementation
arm64: Avoid unnecessary clear_user_page() indirection
...
63 files changed, 1764 insertions, 61 deletions
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index f28853f80089..328e0c454fbd 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -175,6 +175,8 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | MTE | [11-8] | y | + +------------------------------+---------+---------+ | SSBS | [7-4] | y | +------------------------------+---------+---------+ | BT | [3-0] | y | diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 84a9fd2d41b4..bbd9cf54db6c 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -240,6 +240,10 @@ HWCAP2_BTI Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. +HWCAP2_MTE + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described + by Documentation/arm64/memory-tagging-extension.rst. 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst index d9665d83c53a..43b0939d384e 100644 --- a/Documentation/arm64/index.rst +++ b/Documentation/arm64/index.rst @@ -14,6 +14,7 @@ ARM64 Architecture hugetlbpage legacy_instructions memory + memory-tagging-extension perf pointer-authentication silicon-errata diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst new file mode 100644 index 000000000000..034d37c605e8 --- /dev/null +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -0,0 +1,305 @@ +=============================================== +Memory Tagging Extension (MTE) in AArch64 Linux +=============================================== + +Authors: Vincenzo Frascino <vincenzo.frascino@arm.com> + Catalin Marinas <catalin.marinas@arm.com> + +Date: 2020-02-25 + +This document describes the provision of the Memory Tagging Extension +functionality in AArch64 Linux. + +Introduction +============ + +ARMv8.5 based processors introduce the Memory Tagging Extension (MTE) +feature. MTE is built on top of the ARMv8.0 virtual address tagging TBI +(Top Byte Ignore) feature and allows software to access a 4-bit +allocation tag for each 16-byte granule in the physical address space. +Such memory range must be mapped with the Normal-Tagged memory +attribute. A logical tag is derived from bits 59-56 of the virtual +address used for the memory access. A CPU with MTE enabled will compare +the logical tag against the allocation tag and potentially raise an +exception on mismatch, subject to system registers configuration. + +Userspace Support +================= + +When ``CONFIG_ARM64_MTE`` is selected and Memory Tagging Extension is +supported by the hardware, the kernel advertises the feature to +userspace via ``HWCAP2_MTE``. + +PROT_MTE +-------- + +To access the allocation tags, a user process must enable the Tagged +memory attribute on an address range using a new ``prot`` flag for +``mmap()`` and ``mprotect()``: + +``PROT_MTE`` - Pages allow access to the MTE allocation tags. + +The allocation tag is set to 0 when such pages are first mapped in the +user address space and preserved on copy-on-write. ``MAP_SHARED`` is +supported and the allocation tags can be shared between processes. + +**Note**: ``PROT_MTE`` is only supported on ``MAP_ANONYMOUS`` and +RAM-based file mappings (``tmpfs``, ``memfd``). Passing it to other +types of mapping will result in ``-EINVAL`` returned by these system +calls. + +**Note**: The ``PROT_MTE`` flag (and corresponding memory type) cannot +be cleared by ``mprotect()``. + +**Note**: ``madvise()`` memory ranges with ``MADV_DONTNEED`` and +``MADV_FREE`` may have the allocation tags cleared (set to 0) at any +point after the system call. + +Tag Check Faults +---------------- + +When ``PROT_MTE`` is enabled on an address range and a mismatch between +the logical and allocation tags occurs on access, there are three +configurable behaviours: + +- *Ignore* - This is the default mode. The CPU (and kernel) ignores the + tag check fault. + +- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with + ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The + memory access is not performed. If ``SIGSEGV`` is ignored or blocked + by the offending thread, the containing process is terminated with a + ``coredump``. + +- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending + thread, asynchronously following one or multiple tag check faults, + with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting + address is unknown). + +The user can select the above modes, per thread, using the +``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where +``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK`` +bit-field: + +- ``PR_MTE_TCF_NONE`` - *Ignore* tag check faults +- ``PR_MTE_TCF_SYNC`` - *Synchronous* tag check fault mode +- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode + +The current tag check fault mode can be read using the +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. + +Tag checking can also be disabled for a user thread by setting the +``PSTATE.TCO`` bit with ``MSR TCO, #1``. + +**Note**: Signal handlers are always invoked with ``PSTATE.TCO = 0``, +irrespective of the interrupted context. ``PSTATE.TCO`` is restored on +``sigreturn()``. + +**Note**: There are no *match-all* logical tags available for user +applications. + +**Note**: Kernel accesses to the user address space (e.g. ``read()`` +system call) are not checked if the user thread tag checking mode is +``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is +``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user +address accesses, however it cannot always guarantee it. + +Excluding Tags in the ``IRG``, ``ADDG`` and ``SUBG`` instructions +----------------------------------------------------------------- + +The architecture allows excluding certain tags to be randomly generated +via the ``GCR_EL1.Exclude`` register bit-field. By default, Linux +excludes all tags other than 0. A user thread can enable specific tags +in the randomly generated set using the ``prctl(PR_SET_TAGGED_ADDR_CTRL, +flags, 0, 0, 0)`` system call where ``flags`` contains the tags bitmap +in the ``PR_MTE_TAG_MASK`` bit-field. + +**Note**: The hardware uses an exclude mask but the ``prctl()`` +interface provides an include mask. An include mask of ``0`` (exclusion +mask ``0xffff``) results in the CPU always generating tag ``0``. + +Initial process state +--------------------- + +On ``execve()``, the new process has the following configuration: + +- ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled) +- Tag checking mode set to ``PR_MTE_TCF_NONE`` +- ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded) +- ``PSTATE.TCO`` set to 0 +- ``PROT_MTE`` not set on any of the initial memory maps + +On ``fork()``, the new process inherits the parent's configuration and +memory map attributes with the exception of the ``madvise()`` ranges +with ``MADV_WIPEONFORK`` which will have the data and tags cleared (set +to 0). + +The ``ptrace()`` interface +-------------------------- + +``PTRACE_PEEKMTETAGS`` and ``PTRACE_POKEMTETAGS`` allow a tracer to read +the tags from or set the tags to a tracee's address space. The +``ptrace()`` system call is invoked as ``ptrace(request, pid, addr, +data)`` where: + +- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_POKEMTETAGS``. +- ``pid`` - the tracee's PID. +- ``addr`` - address in the tracee's address space. +- ``data`` - pointer to a ``struct iovec`` where ``iov_base`` points to + a buffer of ``iov_len`` length in the tracer's address space. + +The tags in the tracer's ``iov_base`` buffer are represented as one +4-bit tag per byte and correspond to a 16-byte MTE tag granule in the +tracee's address space. + +**Note**: If ``addr`` is not aligned to a 16-byte granule, the kernel +will use the corresponding aligned address. + +``ptrace()`` return value: + +- 0 - tags were copied, the tracer's ``iov_len`` was updated to the + number of tags transferred. This may be smaller than the requested + ``iov_len`` if the requested address range in the tracee's or the + tracer's space cannot be accessed or does not have valid tags. +- ``-EPERM`` - the specified process cannot be traced. +- ``-EIO`` - the tracee's address range cannot be accessed (e.g. invalid + address) and no tags copied. ``iov_len`` not updated. +- ``-EFAULT`` - fault on accessing the tracer's memory (``struct iovec`` + or ``iov_base`` buffer) and no tags copied. ``iov_len`` not updated. +- ``-EOPNOTSUPP`` - the tracee's address does not have valid tags (never + mapped with the ``PROT_MTE`` flag). ``iov_len`` not updated. + +**Note**: There are no transient errors for the requests above, so user +programs should not retry in case of a non-zero system call return. + +``PTRACE_GETREGSET`` and ``PTRACE_SETREGSET`` with ``addr == +``NT_ARM_TAGGED_ADDR_CTRL`` allow ``ptrace()`` access to the tagged +address ABI control and MTE configuration of a process as per the +``prctl()`` options described in +Documentation/arm64/tagged-address-abi.rst and above. The corresponding +``regset`` is 1 element of 8 bytes (``sizeof(long))``). + +Example of correct usage +======================== + +*MTE Example code* + +.. code-block:: c + + /* + * To be compiled with -march=armv8.5-a+memtag + */ + #include <errno.h> + #include <stdint.h> + #include <stdio.h> + #include <stdlib.h> + #include <unistd.h> + #include <sys/auxv.h> + #include <sys/mman.h> + #include <sys/prctl.h> + + /* + * From arch/arm64/include/uapi/asm/hwcap.h + */ + #define HWCAP2_MTE (1 << 18) + + /* + * From arch/arm64/include/uapi/asm/mman.h + */ + #define PROT_MTE 0x20 + + /* + * From include/uapi/linux/prctl.h + */ + #define PR_SET_TAGGED_ADDR_CTRL 55 + #define PR_GET_TAGGED_ADDR_CTRL 56 + # define PR_TAGGED_ADDR_ENABLE (1UL << 0) + # define PR_MTE_TCF_SHIFT 1 + # define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TAG_SHIFT 3 + # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) + + /* + * Insert a random logical tag into the given pointer. + */ + #define insert_random_tag(ptr) ({ \ + uint64_t __val; \ + asm("irg %0, %1" : "=r" (__val) : "r" (ptr)); \ + __val; \ + }) + + /* + * Set the allocation tag on the destination address. + */ + #define set_tag(tagged_addr) do { \ + asm volatile("stg %0, [%0]" : : "r" (tagged_addr) : "memory"); \ + } while (0) + + int main() + { + unsigned char *a; + unsigned long page_sz = sysconf(_SC_PAGESIZE); + unsigned long hwcap2 = getauxval(AT_HWCAP2); + + /* check if MTE is present */ + if (!(hwcap2 & HWCAP2_MTE)) + return EXIT_FAILURE; + + /* + * Enable the tagged address ABI, synchronous MTE tag check faults and + * allow all non-zero tags in the randomly generated set. + */ + if (prctl(PR_SET_TAGGED_ADDR_CTRL, + PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT), + 0, 0, 0)) { + perror("prctl() failed"); + return EXIT_FAILURE; + } + + a = mmap(0, page_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (a == MAP_FAILED) { + perror("mmap() failed"); + return EXIT_FAILURE; + } + + /* + * Enable MTE on the above anonymous mmap. The flag could be passed + * directly to mmap() and skip this step. + */ + if (mprotect(a, page_sz, PROT_READ | PROT_WRITE | PROT_MTE)) { + perror("mprotect() failed"); + return EXIT_FAILURE; + } + + /* access with the default tag (0) */ + a[0] = 1; + a[1] = 2; + + printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]); + + /* set the logical and allocation tags */ + a = (unsigned char *)insert_random_tag(a); + set_tag(a); + + printf("%p\n", a); + + /* non-zero tag access */ + a[0] = 3; + printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]); + + /* + * If MTE is enabled correctly the next instruction will generate an + * exception. + */ + printf("Expecting SIGSEGV...\n"); + a[16] = 0xdd; + + /* this should not be printed in the PR_MTE_TCF_SYNC mode */ + printf("...haven't got one\n"); + + return EXIT_FAILURE; + } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e7f804887dc6..43091f439e4e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1645,6 +1645,39 @@ config ARCH_RANDOM provides a high bandwidth, cryptographically secure hardware random number generator. +config ARM64_AS_HAS_MTE + # Initial support for MTE went in binutils 2.32.0, checked with + # ".arch armv8.5-a+memtag" below. However, this was incomplete + # as a late addition to the final architecture spec (LDGM/STGM) + # is only supported in the newer 2.32.x and 2.33 binutils + # versions, hence the extra "stgm" instruction check below. + def_bool $(as-instr,.arch armv8.5-a+memtag\nstgm xzr$(comma)[x0]) + +config ARM64_MTE + bool "Memory Tagging Extension support" + default y + depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI + select ARCH_USES_HIGH_VMA_FLAGS + help + Memory Tagging (part of the ARMv8.5 Extensions) provides + architectural support for run-time, always-on detection of + various classes of memory error to aid with software debugging + to eliminate vulnerabilities arising from memory-unsafe + languages. + + This option enables the support for the Memory Tagging + Extension at EL0 (i.e. for userspace). + + Selecting this option allows the feature to be detected at + runtime. Any secondary CPU not implementing this feature will + not be allowed a late bring-up. + + Userspace binaries that want to use this feature must + explicitly opt in. The mechanism for the userspace is + described in: + + Documentation/arm64/memory-tagging-extension.rst. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index c4ac9a13ad5f..42868dbd29fd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -64,7 +64,8 @@ #define ARM64_BTI 54 #define ARM64_HAS_ARMv8_4_TTL 55 #define ARM64_HAS_TLB_RANGE 56 +#define ARM64_MTE 57 -#define ARM64_NCAPS 57 +#define ARM64_NCAPS 58 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4099b05372c9..f7e7144af174 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -681,6 +681,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_supports_mte(void) +{ + return IS_ENABLED(CONFIG_ARM64_MTE) && + cpus_have_const_cap(ARM64_MTE); +} + static inline bool system_has_prio_mask_debugging(void) { return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6493a4c63a2f..9a5498c2c8ee 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -104,7 +104,7 @@ #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) -/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ +#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 32acf95d49c7..64ce29378467 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,6 +12,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_ATA (UL(1) << 56) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) @@ -78,7 +79,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 20a9b322d342..43640d797455 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -126,13 +126,18 @@ /* * Memory types available. + * + * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in + * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note + * that protection_map[] only contains MT_NORMAL attributes. */ -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 +#define MT_NORMAL 0 +#define MT_NORMAL_TAGGED 1 +#define MT_NORMAL_NC 2 +#define MT_NORMAL_WT 3 +#define MT_DEVICE_nGnRnE 4 +#define MT_DEVICE_nGnRE 5 +#define MT_DEVICE_GRE 6 /* * Memory types for Stage-2 translation diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 081ec8de9ea6..e3e28f7daf62 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -9,16 +9,53 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey __always_unused) { + unsigned long ret = 0; + if (system_supports_bti() && (prot & PROT_BTI)) - return VM_ARM64_BTI; + ret |= VM_ARM64_BTI; - return 0; + if (system_supports_mte() && (prot & PROT_MTE)) + ret |= VM_MTE; + + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) +static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +{ + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ + if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + return VM_MTE_ALLOWED; + + return 0; +} +#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) + static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); + pteval_t prot = 0; + + if (vm_flags & VM_ARM64_BTI) + prot |= PTE_GP; + + /* + * There are two conditions required for returning a Normal Tagged + * memory type: (1) the user requested it via PROT_MTE passed to + * mmap() or mprotect() and (2) the corresponding vma supports MTE. We + * register (1) as VM_MTE in the vma->vm_flags and (2) as + * VM_MTE_ALLOWED. Note that the latter can only be set during the + * mmap() call since mprotect() does not accept MAP_* flags. + * Checking for VM_MTE only is sufficient since arch_validate_flags() + * does not permit (VM_MTE & !VM_MTE_ALLOWED). + */ + if (vm_flags & VM_MTE) + prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + + return __pgprot(prot); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) @@ -30,8 +67,21 @@ static inline bool arch_validate_prot(unsigned long prot, if (system_supports_bti()) supported |= PROT_BTI; + if (system_supports_mte()) + supported |= PROT_MTE; + return (prot & ~supported) == 0; } #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) +static inline bool arch_validate_flags(unsigned long vm_flags) +{ + if (!system_supports_mte()) + return true; + + /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ + return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); +} +#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h new file mode 100644 index 000000000000..1c99fcadb58c --- /dev/null +++ b/arch/arm64/include/asm/mte.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_MTE_H +#define __ASM_MTE_H + +#define MTE_GRANULE_SIZE UL(16) +#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_TAG_SHIFT 56 +#define MTE_TAG_SIZE 4 + +#ifndef __ASSEMBLY__ + +#include <linux/page-flags.h> + +#include <asm/pgtable-types.h> + +void mte_clear_page_tags(void *addr); +unsigned long mte_copy_tags_from_user(void *to, const void __user *from, + unsigned long n); +unsigned long mte_copy_tags_to_user(void __user *to, void *from, + unsigned long n); +int mte_save_tags(struct page *page); +void mte_save_page_tags(const void *page_addr, void *tag_storage); +bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_page_tags(void *page_addr, const void *tag_storage); +void mte_invalidate_tags(int type, pgoff_t offset); +void mte_invalidate_tags_area(int type); +void *mte_allocate_tag_storage(void); +void mte_free_tag_storage(char *storage); + +#ifdef CONFIG_ARM64_MTE + +/* track which pages have valid allocation tags */ +#define PG_mte_tagged PG_arch_2 + +void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_copy_page_tags(void *kto, const void *kfrom); +void flush_mte_state(void); +void mte_thread_switch(struct task_struct *next); +void mte_suspend_exit(void); +long set_mte_ctrl(struct task_struct *task, unsigned long arg); +long get_mte_ctrl(struct task_struct *task); +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data); + +#else + +/* unused if !CONFIG_ARM64_MTE, silence the compiler */ +#define PG_mte_tagged 0 + +static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +{ +} +static inline void mte_copy_page_tags(void *kto, const void *kfrom) +{ +} +static inline void flush_mte_state(void) +{ +} +static inline void mte_thread_switch(struct task_struct *next) +{ +} +static inline void mte_suspend_exit(void) +{ +} +static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + return 0; +} +static inline long get_mte_ctrl(struct task_struct *task) +{ + return 0; +} +static inline int mte_ptrace_copy_tags(struct task_struct *child, + long request, unsigned long addr, + unsigned long data) +{ + return -EIO; +} + +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index c01b52add377..012cffc574e8 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -15,18 +15,25 @@ #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ #include <asm/pgtable-types.h> -extern void __cpu_clear_user_page(void *p, unsigned long user); -extern void __cpu_copy_user_page(void *to, const void *from, - unsigned long user); +struct page; +struct vm_area_struct; + extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +#define __HAVE_ARCH_COPY_USER_HIGHPAGE + +void copy_highpage(struct page *to, struct page *from); +#define __HAVE_ARCH_COPY_HIGHPAGE + #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) -#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) typedef struct page *pgtable_t; @@ -36,7 +43,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) #include <asm-generic/getorder.h> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2df4b75fce3c..4cd0d6ca8aa1 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -57,6 +57,7 @@ extern bool arm64_use_ng_mappings; #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -66,6 +67,7 @@ extern bool arm64_use_ng_mappings; #define _HYP_PAGE_DEFAULT _PAGE_DEFAULT #define PAGE_KERNEL __pgprot(PROT_NORMAL) +#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 80f4278f7183..a11bf52e0c38 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -9,6 +9,7 @@ #include <asm/proc-fns.h> #include <asm/memory.h> +#include <asm/mte.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable-prot.h> #include <asm/tlbflush.h> @@ -94,6 +95,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) +#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ + PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define pte_cont_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ @@ -300,6 +303,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); + if (system_supports_mte() && + pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) + mte_sync_tags(ptep, pte); + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); @@ -709,8 +716,13 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + /* + * Normal and Normal-Tagged are two different memory types and indices + * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. + */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | + PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); @@ -895,6 +907,38 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, extern int kern_addr_valid(unsigned long addr); +#ifdef CONFIG_ARM64_MTE + +#define __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + if (system_supports_mte()) + return mte_save_tags(page); + return 0; +} + +#define __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ + if (system_supports_mte()) + mte_invalidate_tags(type, offset); +} + +static inline void arch_swap_invalidate_area(int type) +{ + if (system_supports_mte()) + mte_invalidate_tags_area(type); +} + +#define __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ + if (system_supports_mte() && mte_restore_tags(entry, page)) + set_bit(PG_mte_tagged, &page->flags); +} + +#endif /* CONFIG_ARM64_MTE */ + /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7d90ea2e2063..fce8cbecd6bc 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -152,6 +152,10 @@ struct thread_struct { struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; #endif +#ifdef CONFIG_ARM64_MTE + u64 sctlr_tcf0; + u64 gcr_user_incl; +#endif }; static inline void arch_thread_struct_whitelist(unsigned long *offset, @@ -301,10 +305,10 @@ extern void __init minsigstksz_setup(void); #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ -long set_tagged_addr_ctrl(unsigned long arg); -long get_tagged_addr_ctrl(void); -#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg) -#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl() +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) #endif /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6beef073871d..d52c1b3ce589 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -91,10 +91,12 @@ #define PSTATE_PAN pstate_field(0, 4) #define PSTATE_UAO pstate_field(0, 3) #define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_TCO pstate_field(3, 4) #define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -181,6 +183,8 @@ #define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) +#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) +#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) @@ -218,6 +222,8 @@ #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) +#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) @@ -370,6 +376,7 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) @@ -462,6 +469,7 @@ #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) #define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) @@ -518,6 +526,7 @@ #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) #define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) @@ -533,6 +542,15 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ATA (BIT(43)) + +#define SCTLR_ELx_TCF_SHIFT 40 +#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) + +#define SCTLR_ELx_ITFSB (BIT(37)) #define SCTLR_ELx_ENIA (BIT(31)) #define SCTLR_ELx_ENIB (BIT(30)) #define SCTLR_ELx_ENDA (BIT(27)) @@ -561,6 +579,14 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_ATA0 (BIT(42)) + +#define SCTLR_EL1_TCF0_SHIFT 38 +#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) + #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -589,6 +615,7 @@ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ + SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ @@ -597,6 +624,7 @@ #define MAIR_ATTR_DEVICE_GRE UL(0x0c) #define MAIR_ATTR_NORMAL_NC UL(0x44) #define MAIR_ATTR_NORMAL_WT UL(0xbb) +#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) #define MAIR_ATTR_NORMAL UL(0xff) #define MAIR_ATTR_MASK UL(0xff) @@ -696,6 +724,10 @@ #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_MTE_NI 0x0 +#define ID_AA64PFR1_MTE_EL0 0x1 +#define ID_AA64PFR1_MTE 0x2 + /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 #define ID_AA64ZFR0_F32MM_SHIFT 52 @@ -930,6 +962,28 @@ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) +/* TCR EL1 Bit Definitions */ +#define SYS_TCR_EL1_TCMA1 (BIT(58)) +#define SYS_TCR_EL1_TCMA0 (BIT(57)) + +/* GCR_EL1 Definitions */ +#define SYS_GCR_EL1_RRND (BIT(16)) +#define SYS_GCR_EL1_EXCL_MASK 0xffffUL + +/* RGSR_EL1 Definitions */ +#define SYS_RGSR_EL1_TAG_MASK 0xfUL +#define SYS_RGSR_EL1_SEED_SHIFT 8 +#define SYS_RGSR_EL1_SEED_MASK 0xffffUL + +/* GMID_EL1 field definitions */ +#define SYS_GMID_EL1_BS_SHIFT 0 +#define SYS_GMID_EL1_BS_SIZE 4 + +/* TFSR{,E0}_EL1 bit definitions */ +#define SYS_TFSR_EL1_TF0_SHIFT 0 +#define SYS_TFSR_EL1_TF1_SHIFT 1 +#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) +#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) @@ -1034,6 +1088,13 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5e784e16ee89..1fbab854a51b 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ +#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -96,10 +97,11 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) +#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_FSCHECK) + _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 912162f73529..b8f41aa234ee 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -74,6 +74,6 @@ #define HWCAP2_DGH (1 << 15) #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) -/* reserved for HWCAP2_MTE (1 << 18) */ +#define HWCAP2_MTE (1 << 18) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h index 6fdd71eb644f..1e6482a838e1 100644 --- a/arch/arm64/include/uapi/asm/mman.h +++ b/arch/arm64/include/uapi/asm/mman.h @@ -5,5 +5,6 @@ #include <asm-generic/mman.h> #define PROT_BTI 0x10 /* BTI guarded page */ +#define PROT_MTE 0x20 /* Normal Tagged mapping */ #endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 42cbe34d95ce..758ae984ff97 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -51,6 +51,7 @@ #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_DIT_BIT 0x01000000 +#define PSR_TCO_BIT 0x02000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 @@ -75,6 +76,9 @@ /* syscall emulation path in ptrace */ #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 +/* MTE allocation tag access */ +#define PTRACE_PEEKMTETAGS 33 +#define PTRACE_POKEMTETAGS 34 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88f160afc468..bbaf0bc4ad60 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 79207c323553..dcc165b3fc04 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -75,6 +75,7 @@ #include <asm/cpu_ops.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/traps.h> @@ -227,6 +228,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), @@ -1688,6 +1691,22 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_BTI */ +#ifdef CONFIG_ARM64_MTE +static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) +{ + static bool cleared_zero_page = false; + + /* + * Clear the tags in the zero page. This needs to be done via the + * linear map which has the Tagged attribute. + */ + if (!cleared_zero_page) { + cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); + } +} +#endif /* CONFIG_ARM64_MTE */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2104,6 +2123,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, }, #endif +#ifdef CONFIG_ARM64_MTE + { + .desc = "Memory Tagging Extension", + .capability = ARM64_MTE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE, + .sign = FTR_UNSIGNED, + .cpu_enable = cpu_enable_mte, + }, +#endif /* CONFIG_ARM64_MTE */ {}, }; @@ -2220,6 +2252,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif +#ifdef CONFIG_ARM64_MTE + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), +#endif /* CONFIG_ARM64_MTE */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 25113245825c..6a7bb3729d60 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -93,6 +93,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_DGH] = "dgh", [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", + [KERNEL_HWCAP_MTE] = "mte", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index aeb337029d56..f30007dff35f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -147,6 +147,32 @@ alternative_cb_end .L__asm_ssbd_skip\@: .endm + /* Check for MTE asynchronous tag check faults */ + .macro check_mte_async_tcf, flgs, tmp +#ifdef CONFIG_ARM64_MTE +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + mrs_s \tmp, SYS_TFSRE0_EL1 + tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f + /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ + orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT + str \flgs, [tsk, #TSK_TI_FLAGS] + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif + .endm + + /* Clear the MTE asynchronous tag check faults */ + .macro clear_mte_async_tcf +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + dsb ish + msr_s SYS_TFSRE0_EL1, xzr +alternative_else_nop_endif +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -180,6 +206,8 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + /* Check for asynchronous tag check faults in user space */ + check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 @@ -231,6 +259,13 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING str x20, [sp, #S_PMR_SAVE] alternative_else_nop_endif + /* Re-enable tag checking (TCO set on exception entry) */ +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + SET_PSTATE_TCO(0) +alternative_else_nop_endif +#endif + /* * Registers that may be useful after this macro is invoked: * @@ -740,6 +775,8 @@ SYM_CODE_START_LOCAL(ret_to_user) and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + /* Ignore asynchronous tag check faults in the uaccess routines */ + clear_mte_async_tcf enable_step_tsk x1, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3a1662392c95..42003774d261 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -30,6 +30,7 @@ #include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/pgalloc.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> @@ -284,6 +285,117 @@ static int create_safe_exec_page(void *src_start, size_t length, #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) +#ifdef CONFIG_ARM64_MTE + +static DEFINE_XARRAY(mte_pages); + +static int save_tags(struct page *page, unsigned long pfn) +{ + void *tag_storage, *ret; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) { + mte_free_tag_storage(ret); + } + + return 0; +} + +static void swsusp_mte_free_storage(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); + + xa_destroy(&mte_pages); +} + +static int swsusp_mte_save_tags(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + int ret = 0; + int n = 0; + + if (!system_supports_mte()) + return 0; + + for_each_populated_zone(zone) { + max_zone_pfn = zone_end_pfn(zone); + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { + struct page *page = pfn_to_online_page(pfn); + + if (!page) + continue; + + if (!test_bit(PG_mte_tagged, &page->flags)) + continue; + + ret = save_tags(page, pfn); + if (ret) { + swsusp_mte_free_storage(); + goto out; + } + + n++; + } + } + pr_info("Saved %d MTE pages\n", n); + +out: + return ret; +} + +static void swsusp_mte_restore_tags(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + int n = 0; + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + unsigned long pfn = xa_state.xa_index; + struct page *page = pfn_to_online_page(pfn); + + mte_restore_page_tags(page_address(page), tags); + + mte_free_tag_storage(tags); + n++; + } + xa_unlock(&mte_pages); + + pr_info("Restored %d MTE pages\n", n); + + xa_destroy(&mte_pages); +} + +#else /* CONFIG_ARM64_MTE */ + +static int swsusp_mte_save_tags(void) +{ + return 0; +} + +static void swsusp_mte_restore_tags(void) +{ +} + +#endif /* CONFIG_ARM64_MTE */ + int swsusp_arch_suspend(void) { int ret = 0; @@ -301,6 +413,10 @@ int swsusp_arch_suspend(void) /* make the crash dump kernel image visible/saveable */ crash_prepare_suspend(); + ret = swsusp_mte_save_tags(); + if (ret) + return ret; + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -314,6 +430,8 @@ int swsusp_arch_suspend(void) dcache_clean_range(__hyp_text_start, __hyp_text_end); } + swsusp_mte_restore_tags(); + /* make the crash dump kernel image protected again */ crash_post_resume(); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c new file mode 100644 index 000000000000..52a0638ed967 --- /dev/null +++ b/arch/arm64/kernel/mte.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 ARM Ltd. + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/prctl.h> +#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/string.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/thread_info.h> +#include <linux/uio.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> + +static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +{ + pte_t old_pte = READ_ONCE(*ptep); + + if (check_swap && is_swap_pte(old_pte)) { + swp_entry_t entry = pte_to_swp_entry(old_pte); + + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + return; + } + + mte_clear_page_tags(page_address(page)); +} + +void mte_sync_tags(pte_t *ptep, pte_t pte) +{ + struct page *page = pte_page(pte); + long i, nr_pages = compound_nr(page); + bool check_swap = nr_pages == 1; + + /* if PG_mte_tagged is set, tags have already been initialised */ + for (i = 0; i < nr_pages; i++, page++) { + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_sync_page_tags(page, ptep, check_swap); + } +} + +int memcmp_pages(struct page *page1, struct page *page2) +{ + char *addr1, *addr2; + int ret; + + addr1 = page_address(page1); + addr2 = page_address(page2); + ret = memcmp(addr1, addr2, PAGE_SIZE); + + if (!system_supports_mte() || ret) + return ret; + + /* + * If the page content is identical but at least one of the pages is + * tagged, return non-zero to avoid KSM merging. If only one of the + * pages is tagged, set_pte_at() may zero or change the tags of the + * other page via mte_sync_tags(). + */ + if (test_bit(PG_mte_tagged, &page1->flags) || + test_bit(PG_mte_tagged, &page2->flags)) + return addr1 != addr2; + + return ret; +} + +static void update_sctlr_el1_tcf0(u64 tcf0) +{ + /* ISB required for the kernel uaccess routines */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); + isb(); +} + +static void set_sctlr_el1_tcf0(u64 tcf0) +{ + /* + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1.TCF0 one. + */ + preempt_disable(); + current->thread.sctlr_tcf0 = tcf0; + update_sctlr_el1_tcf0(tcf0); + preempt_enable(); +} + +static void update_gcr_el1_excl(u64 incl) +{ + u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK; + + /* + * Note that 'incl' is an include mask (controlled by the user via + * prctl()) while GCR_EL1 accepts an exclude mask. + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); +} + +static void set_gcr_el1_excl(u64 incl) +{ + current->thread.gcr_user_incl = incl; + update_gcr_el1_excl(incl); +} + +void flush_mte_state(void) +{ + if (!system_supports_mte()) + return; + + /* clear any pending asynchronous tag fault */ + dsb(ish); + write_sysreg_s(0, SYS_TFSRE0_EL1); + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + /* disable tag checking */ + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); + /* reset tag generation mask */ + set_gcr_el1_excl(0); +} + +void mte_thread_switch(struct task_struct *next) +{ + if (!system_supports_mte()) + return; + + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + update_gcr_el1_excl(next->thread.gcr_user_incl); +} + +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + update_gcr_el1_excl(current->thread.gcr_user_incl); +} + +long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + u64 tcf0; + u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + + if (!system_supports_mte()) + return 0; + + switch (arg & PR_MTE_TCF_MASK) { + case PR_MTE_TCF_NONE: + tcf0 = SCTLR_EL1_TCF0_NONE; + break; + case PR_MTE_TCF_SYNC: + tcf0 = SCTLR_EL1_TCF0_SYNC; + break; + case PR_MTE_TCF_ASYNC: + tcf0 = SCTLR_EL1_TCF0_ASYNC; + break; + default: + return -EINVAL; + } + + if (task != current) { + task->thread.sctlr_tcf0 = tcf0; + task->thread.gcr_user_incl = gcr_incl; + } else { + set_sctlr_el1_tcf0(tcf0); + set_gcr_el1_excl(gcr_incl); + } + + return 0; +} + +long get_mte_ctrl(struct task_struct *task) +{ + unsigned long ret; + + if (!system_supports_mte()) + return 0; + + ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT; + + switch (task->thread.sctlr_tcf0) { + case SCTLR_EL1_TCF0_NONE: + return PR_MTE_TCF_NONE; + case SCTLR_EL1_TCF0_SYNC: + ret |= PR_MTE_TCF_SYNC; + break; + case SCTLR_EL1_TCF0_ASYNC: + ret |= PR_MTE_TCF_ASYNC; + break; + } + + return ret; +} + +/* + * Access MTE tags in another process' address space as given in mm. Update + * the number of tags copied. Return 0 if any tags copied, error otherwise. + * Inspired by __access_remote_vm(). + */ +static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct vm_area_struct *vma; + void __user *buf = kiov->iov_base; + size_t len = kiov->iov_len; + int ret; + int write = gup_flags & FOLL_WRITE; + + if (!access_ok(buf, len)) + return -EFAULT; + + if (mmap_read_lock_killable(mm)) + return -EIO; + + while (len) { + unsigned long tags, offset; + void *maddr; + struct page *page = NULL; + + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, + &vma, NULL); + if (ret <= 0) + break; + + /* + * Only copy tags if the page has been mapped as PROT_MTE + * (PG_mte_tagged set). Otherwise the tags are not valid and + * not accessible to user. Moreover, an mprotect(PROT_MTE) + * would cause the existing tags to be cleared if the page + * was never mapped with PROT_MTE. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + ret = -EOPNOTSUPP; + put_page(page); + break; + } + + /* limit access to the end of the page */ + offset = offset_in_page(addr); + tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE); + + maddr = page_address(page); + if (write) { + tags = mte_copy_tags_from_user(maddr + offset, buf, tags); + set_page_dirty_lock(page); + } else { + tags = mte_copy_tags_to_user(buf, maddr + offset, tags); + } + put_page(page); + + /* error accessing the tracer's buffer */ + if (!tags) + break; + + len -= tags; + buf += tags; + addr += tags * MTE_GRANULE_SIZE; + } + mmap_read_unlock(mm); + + /* return an error if no tags copied */ + kiov->iov_len = buf - kiov->iov_base; + if (!kiov->iov_len) { + /* check for error accessing the tracee's address space */ + if (ret <= 0) + return -EIO; + else + return -EFAULT; + } + + return 0; +} + +/* + * Copy MTE tags in another process' address space at 'addr' to/from tracer's + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm(). + */ +static int access_remote_tags(struct task_struct *tsk, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + if (!tsk->ptrace || (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return -EPERM; + } + + ret = __access_remote_tags(mm, addr, kiov, gup_flags); + mmput(mm); + + return ret; +} + +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + struct iovec kiov; + struct iovec __user *uiov = (void __user *)data; + unsigned int gup_flags = FOLL_FORCE; + + if (!system_supports_mte()) + return -EIO; + + if (get_user(kiov.iov_base, &uiov->iov_base) || + get_user(kiov.iov_len, &uiov->iov_len)) + return -EFAULT; + + if (request == PTRACE_POKEMTETAGS) + gup_flags |= FOLL_WRITE; + + /* align addr to the MTE tag granule */ + addr &= MTE_GRANULE_MASK; + + ret = access_remote_tags(child, addr, &kiov, gup_flags); + if (!ret) + ret = put_user(kiov.iov_len, &uiov->iov_len); + + return ret; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 085d8ca39e47..4784011cecac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -53,6 +53,7 @@ #include <asm/exec.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> @@ -240,7 +241,7 @@ static void print_pstate(struct pt_regs *regs) const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> PSR_BTYPE_SHIFT]; - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -252,6 +253,7 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', pstate & PSR_UAO_BIT ? '+' : '-', + pstate & PSR_TCO_BIT ? '+' : '-', btype_str); } } @@ -337,6 +339,7 @@ void flush_thread(void) tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); + flush_mte_state(); } void release_thread(struct task_struct *dead_task) @@ -369,6 +372,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + /* clear any pending asynchronous tag fault raised by the parent */ + clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); + return 0; } @@ -561,6 +567,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, */ dsb(ish); + /* + * MTE thread switching must happen after the DSB above to ensure that + * any asynchronous tag check faults have been logged in the TFSR*_EL1 + * registers. + */ + mte_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -623,11 +636,18 @@ void arch_setup_new_exec(void) */ static unsigned int tagged_addr_disabled; -long set_tagged_addr_ctrl(unsigned long arg) +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) { - if (is_compat_task()) + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (arg & ~PR_TAGGED_ADDR_ENABLE) + + if (system_supports_mte()) + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + + if (arg & ~valid_mask) return -EINVAL; /* @@ -637,20 +657,28 @@ long set_tagged_addr_ctrl(unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) return -EINVAL; - update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + if (set_mte_ctrl(task, arg) != 0) + return -EINVAL; + + update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; } -long get_tagged_addr_ctrl(void) +long get_tagged_addr_ctrl(struct task_struct *task) { - if (is_compat_task()) + long ret = 0; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (test_thread_flag(TIF_TAGGED_ADDR)) - return PR_TAGGED_ADDR_ENABLE; + if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) + ret = PR_TAGGED_ADDR_ENABLE; - return 0; + ret |= get_mte_ctrl(task); + + return ret; } /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d8ebfd813e28..f49b349e16a3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/fpsimd.h> +#include <asm/mte.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> #include <asm/syscall.h> @@ -1032,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target, #endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1051,6 +1081,9 @@ enum aarch64_regset { REGSET_PACG_KEYS, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + REGSET_TAGGED_ADDR_CTRL, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -1148,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = { }, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_ARM_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { @@ -1691,6 +1734,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + switch (request) { + case PTRACE_PEEKMTETAGS: + case PTRACE_POKEMTETAGS: + return mte_ptrace_copy_tags(child, request, addr, data); + } + return ptrace_request(child, request, addr, data); } @@ -1793,7 +1842,7 @@ void syscall_trace_exit(struct pt_regs *regs) * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f3af68dc1cf8..bdcaaf091e1e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -749,6 +749,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->pstate |= PSR_BTYPE_C; } + /* TCO (Tag Check Override) always cleared for signal handlers */ + regs->pstate &= ~PSR_TCO_BIT; + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else @@ -933,6 +936,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (thread_flags & _TIF_MTE_ASYNC_FAULT) { + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + send_sig_fault(SIGSEGV, SEGV_MTEAERR, + (void __user *)NULL, current); + } + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 584c14ce3c86..96cd347c7a46 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,6 +10,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/smp_plat.h> @@ -73,6 +74,9 @@ void notrace __cpu_suspend_exit(void) * disabled it, make sure their wishes are obeyed. */ spectre_v4_enable_mitigation(NULL); + + /* Restore additional MTE-specific configuration */ + mte_suspend_exit(); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f0c04863d2c..e4c0dadf0d92 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -123,6 +123,16 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, local_daif_restore(DAIF_PROCCTX); user_exit(); + if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + /* + * Process the asynchronous tag check fault before the actual + * syscall. do_notify_resume() will send a signal to userspace + * before the syscall is restarted. + */ + regs->regs[0] = -ERESTARTNOINTR; + return; + } + if (has_syscall_work(flags)) { /* * The de-facto standard way to skip a system call using ptrace diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b8a8f6169d0..9ca270603980 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1134,6 +1134,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); + } else if (id == SYS_ID_AA64PFR1_EL1) { + val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) | (0xfUL << ID_AA64ISAR1_API_SHIFT) | @@ -1385,6 +1387,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + kvm_inject_undefined(vcpu); + return false; +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1550,6 +1559,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + + { SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, + { SYS_DESC(SYS_GCR_EL1), access_mte_regs }, + { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, @@ -1574,6 +1587,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, + { SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, + { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, + { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 2fc253466dbf..d31e1169d9b8 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -16,3 +16,5 @@ lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_ARM64_MTE) += mte.o diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S new file mode 100644 index 000000000000..03ca6d8b8670 --- /dev/null +++ b/arch/arm64/lib/mte.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/mte.h> +#include <asm/page.h> +#include <asm/sysreg.h> + + .arch armv8.5-a+memtag + +/* + * multitag_transfer_size - set \reg to the block size that is accessed by the + * LDGM/STGM instructions. + */ + .macro multitag_transfer_size, reg, tmp + mrs_s \reg, SYS_GMID_EL1 + ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE + mov \tmp, #4 + lsl \reg, \tmp, \reg + .endm + +/* + * Clear the tags in a page + * x0 - address of the page to be cleared + */ +SYM_FUNC_START(mte_clear_page_tags) + multitag_transfer_size x1, x2 +1: stgm xzr, [x0] + add x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(mte_clear_page_tags) + +/* + * Copy the tags from the source page to the destination one + * x0 - address of the destination page + * x1 - address of the source page + */ +SYM_FUNC_START(mte_copy_page_tags) + mov x2, x0 + mov x3, x1 + multitag_transfer_size x5, x6 +1: ldgm x4, [x3] + stgm x4, [x2] + add x2, x2, x5 + add x3, x3, x5 + tst x2, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(mte_copy_page_tags) + +/* + * Read tags from a user buffer (one tag per byte) and set the corresponding + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS. + * x0 - kernel address (to) + * x1 - user buffer (from) + * x2 - number of tags/bytes (n) + * Returns: + * x0 - number of tags read/set + */ +SYM_FUNC_START(mte_copy_tags_from_user) + mov x3, x1 + cbz x2, 2f +1: + uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0 + lsl x4, x4, #MTE_TAG_SHIFT + stg x4, [x0], #MTE_GRANULE_SIZE + add x1, x1, #1 + subs x2, x2, #1 + b.ne 1b + + // exception handling and function return +2: sub x0, x1, x3 // update the number of tags set + ret +SYM_FUNC_END(mte_copy_tags_from_user) + +/* + * Get the tags from a kernel address range and write the tag values to the + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS. + * x0 - user buffer (to) + * x1 - kernel address (from) + * x2 - number of tags/bytes (n) + * Returns: + * x0 - number of tags read/set + */ +SYM_FUNC_START(mte_copy_tags_to_user) + mov x3, x0 + cbz x2, 2f +1: + ldg x4, [x1] + ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE + uao_user_alternative 2f, strb, sttrb, w4, x0, 0 + add x0, x0, #1 + add x1, x1, #MTE_GRANULE_SIZE + subs x2, x2, #1 + b.ne 1b + + // exception handling and function return +2: sub x0, x0, x3 // update the number of tags copied + ret +SYM_FUNC_END(mte_copy_tags_to_user) + +/* + * Save the tags in a page + * x0 - page address + * x1 - tag storage + */ +SYM_FUNC_START(mte_save_page_tags) + multitag_transfer_size x7, x5 +1: + mov x2, #0 +2: + ldgm x5, [x0] + orr x2, x2, x5 + add x0, x0, x7 + tst x0, #0xFF // 16 tag values fit in a register, + b.ne 2b // which is 16*16=256 bytes + + str x2, [x1], #8 + + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + + ret +SYM_FUNC_END(mte_save_page_tags) + +/* + * Restore the tags in a page + * x0 - page address + * x1 - tag storage + */ +SYM_FUNC_START(mte_restore_page_tags) + multitag_transfer_size x7, x5 +1: + ldr x2, [x1], #8 +2: + stgm x2, [x0] + add x0, x0, x7 + tst x0, #0xFF + b.ne 2b + + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + + ret +SYM_FUNC_END(mte_restore_page_tags) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 2a1d275cd4d7..5ead3c3de3b6 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o +obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 2ee7b73433a5..70a71f38b6a9 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -6,21 +6,32 @@ * Copyright (C) 2012 ARM Ltd. */ +#include <linux/bitops.h> #include <linux/mm.h> #include <asm/page.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/mte.h> -void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +void copy_highpage(struct page *to, struct page *from) { - struct page *page = virt_to_page(kto); + struct page *kto = page_address(to); + struct page *kfrom = page_address(from); + copy_page(kto, kfrom); - flush_dcache_page(page); + + if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { + set_bit(PG_mte_tagged, &to->flags); + mte_copy_page_tags(kto, kfrom); + } } -EXPORT_SYMBOL_GPL(__cpu_copy_user_page); +EXPORT_SYMBOL(copy_highpage); -void __cpu_clear_user_page(void *kaddr, unsigned long vaddr) +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - clear_page(kaddr); + copy_highpage(to, from); + flush_dcache_page(to); } -EXPORT_SYMBOL_GPL(__cpu_clear_user_page); +EXPORT_SYMBOL_GPL(copy_user_highpage); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a696a7921da4..94c99c1c19e3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -643,6 +643,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 0; } +static int do_tag_check_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + do_bad_area(addr, esr, regs); + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, @@ -661,7 +668,7 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, + { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 75df62fea1b6..936c4762dadf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -122,7 +122,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; /* creating or taking down mappings is always safe */ if (old == 0 || new == 0) @@ -136,6 +136,17 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if (old & ~new & PTE_NG) return false; + /* + * Changing the memory type between Normal and Normal-Tagged is safe + * since Tagged is considered a permission attribute from the + * mismatched attribute aliases perspective. + */ + if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || + (old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) && + ((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || + (new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED))) + mask |= PTE_ATTRINDX_MASK; + return ((old ^ new) & ~mask) == 0; } @@ -491,7 +502,12 @@ static void __init map_mem(pgd_t *pgdp) if (memblock_is_nomap(reg)) continue; - __map_memblock(pgdp, start, end, PAGE_KERNEL, flags); + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); } /* diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c new file mode 100644 index 000000000000..c52c1847079c --- /dev/null +++ b/arch/arm64/mm/mteswap.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/pagemap.h> +#include <linux/xarray.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <asm/mte.h> + +static DEFINE_XARRAY(mte_pages); + +void *mte_allocate_tag_storage(void) +{ + /* tags granule is 16 bytes, 2 tags stored per byte */ + return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); +} + +void mte_free_tag_storage(char *storage) +{ + kfree(storage); +} + +int mte_save_tags(struct page *page) +{ + void *tag_storage, *ret; + + if (!test_bit(PG_mte_tagged, &page->flags)) + return 0; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + /* page_private contains the swap entry.val set in do_swap_page */ + ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (ret) { + /* Entry is being replaced, free the old entry */ + mte_free_tag_storage(ret); + } + + return 0; +} + +bool mte_restore_tags(swp_entry_t entry, struct page *page) +{ + void *tags = xa_load(&mte_pages, entry.val); + + if (!tags) + return false; + + mte_restore_page_tags(page_address(page), tags); + + return true; +} + +void mte_invalidate_tags(int type, pgoff_t offset) +{ + swp_entry_t entry = swp_entry(type, offset); + void *tags = xa_erase(&mte_pages, entry.val); + + mte_free_tag_storage(tags); +} + +void mte_invalidate_tags_area(int type) +{ + swp_entry_t entry = swp_entry(type, 0); + swp_entry_t last_entry = swp_entry(type + 1, 0); + void *tags; + + XA_STATE(xa_state, &mte_pages, entry.val); + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, last_entry.val - 1) { + __xa_erase(&mte_pages, xa_state.xa_index); + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); +} diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 796e47a571e6..23c326a06b2d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -18,6 +18,7 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> #include <asm/smp.h> +#include <asm/sysreg.h> #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K @@ -44,14 +45,18 @@ #define TCR_KASAN_FLAGS 0 #endif -/* Default MAIR_EL1 */ +/* + * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and + * changed during __cpu_setup to Normal Tagged if the system supports MTE. + */ #define MAIR_EL1_SET \ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ - MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED)) #ifdef CONFIG_CPU_PM /** @@ -421,6 +426,29 @@ SYM_FUNC_START(__cpu_setup) * Memory region attributes */ mov_q x5, MAIR_EL1_SET +#ifdef CONFIG_ARM64_MTE + /* + * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported + * (ID_AA64PFR1_EL1[11:8] > 1). + */ + mrs x10, ID_AA64PFR1_EL1 + ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4 + cmp x10, #ID_AA64PFR1_MTE + b.lt 1f + + /* Normal Tagged memory type at the corresponding MAIR index */ + mov x10, #MAIR_ATTR_NORMAL_TAGGED + bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8 + + /* initialize GCR_EL1: all non-zero tags excluded by default */ + mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) + msr_s SYS_GCR_EL1, x10 + + /* clear any pending tag check faults in TFSR*_EL1 */ + msr_s SYS_TFSR_EL1, xzr + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif msr mair_el1, x5 /* * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 265284dc942d..807dc634bbd2 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -171,6 +171,10 @@ static const struct prot_bits pte_bits[] = { .mask = PTE_ATTRINDX_MASK, .val = PTE_ATTRINDX(MT_NORMAL), .set = "MEM/NORMAL", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_TAGGED), + .set = "MEM/NORMAL-TAGGED", } }; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 9ccbf0576cd0..a7f3e12cfbdb 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -27,7 +27,7 @@ static inline void signal_compat_build_tests(void) */ BUILD_BUG_ON(NSIGILL != 11); BUILD_BUG_ON(NSIGFPE != 15); - BUILD_BUG_ON(NSIGSEGV != 7); + BUILD_BUG_ON(NSIGSEGV != 9); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 5); BUILD_BUG_ON(NSIGCHLD != 6); diff --git a/fs/namespace.c b/fs/namespace.c index bae0e95b3713..32a0b9146757 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3075,7 +3075,7 @@ static void shrink_submounts(struct mount *mnt) void *copy_mount_options(const void __user * data) { char *copy; - unsigned size; + unsigned left, offset; if (!data) return NULL; @@ -3084,16 +3084,27 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - size = PAGE_SIZE - offset_in_page(data); + left = copy_from_user(copy, data, PAGE_SIZE); - if (copy_from_user(copy, data, size)) { + /* + * Not all architectures have an exact copy_from_user(). Resort to + * byte at a time. + */ + offset = PAGE_SIZE - left; + while (left) { + char c; + if (get_user(c, (const char __user *)data + offset)) + break; + copy[offset] = c; + left--; + offset++; + } + + if (left == PAGE_SIZE) { kfree(copy); return ERR_PTR(-EFAULT); } - if (size != PAGE_SIZE) { - if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) - memset(copy + size, 0, PAGE_SIZE - size); - } + return copy; } diff --git a/fs/proc/page.c b/fs/proc/page.c index f909243d4a66..9f1077d94cde 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -217,6 +217,9 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); +#ifdef CONFIG_64BIT + u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); +#endif return u; }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5066b0251ed8..35172a91148e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -653,6 +653,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", +#ifdef CONFIG_ARM64_MTE + [ilog2(VM_MTE)] = "mt", + [ilog2(VM_MTE_ALLOWED)] = "", +#endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index abd20ef93c98..eee1877a354e 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -17,5 +17,6 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ca6e6a81576b..4312c6c808e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -340,6 +340,14 @@ extern unsigned int kobjsize(const void *objp); # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif +#if defined(CONFIG_ARM64_MTE) +# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +#else +# define VM_MTE VM_NONE +# define VM_MTE_ALLOWED VM_NONE +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif diff --git a/include/linux/mman.h b/include/linux/mman.h index 6f34c33075f9..629cefc4ecba 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -78,13 +78,18 @@ static inline void vm_unacct_memory(long pages) } /* - * Allow architectures to handle additional protection bits + * Allow architectures to handle additional protection and flag bits. The + * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif +#ifndef arch_calc_vm_flag_bits +#define arch_calc_vm_flag_bits(flags) 0 +#endif + #ifndef arch_vm_get_page_prot #define arch_vm_get_page_prot(vm_flags) __pgprot(0) #endif @@ -103,6 +108,19 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) #define arch_validate_prot arch_validate_prot #endif +#ifndef arch_validate_flags +/* + * This is called from mmap() and mprotect() with the updated vma->vm_flags. + * + * Returns true if the VM_* flags are valid. + */ +static inline bool arch_validate_flags(unsigned long flags) +{ + return true; +} +#define arch_validate_flags arch_validate_flags +#endif + /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 @@ -135,7 +153,8 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | - _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ); + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6be1aa559b1e..276140c94f4a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,6 +136,9 @@ enum pageflags { PG_young, PG_idle, #endif +#ifdef CONFIG_64BIT + PG_arch_2, +#endif __NR_PAGEFLAGS, /* Filesystems */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e8cbc2e795d5..dc3b74129fbc 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -633,6 +633,34 @@ static inline int arch_unmap_one(struct mm_struct *mm, } #endif +/* + * Allow architectures to preserve additional metadata associated with + * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function + * prototypes must be defined in the arch-specific asm/pgtable.h file. + */ +#ifndef __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + return 0; +} +#endif + +#ifndef __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ +} + +static inline void arch_swap_invalidate_area(int type) +{ +} +#endif + +#ifndef __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ +} +#endif + #ifndef __HAVE_ARCH_PGD_OFFSET_GATE #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5fb752034386..67018d367b9f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -79,6 +79,12 @@ #define IF_HAVE_PG_IDLE(flag,string) #endif +#ifdef CONFIG_64BIT +#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_ARCH_2(flag,string) +#endif + #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ {1UL << PG_waiters, "waiters" }, \ @@ -105,7 +111,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ -IF_HAVE_PG_IDLE(PG_idle, "idle" ) +IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index cb3d6c267181..7aacf9389010 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -229,7 +229,9 @@ typedef struct siginfo { #define SEGV_ACCADI 5 /* ADI not enabled for mapped object */ #define SEGV_ADIDERR 6 /* Disrupting MCD error */ #define SEGV_ADIPERR 7 /* Precise MCD exception */ -#define NSIGSEGV 7 +#define SEGV_MTEAERR 8 /* Asynchronous ARM MTE error */ +#define SEGV_MTESERR 9 /* Synchronous ARM MTE exception */ +#define NSIGSEGV 9 /* * SIGBUS si_codes diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 22220945a5fd..30f68b42eeb5 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -425,6 +425,7 @@ typedef struct elf64_shdr { #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ #define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2ccff8472cd4..1a5773c95f53 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2337,6 +2337,9 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_workingset) | (1L << PG_locked) | (1L << PG_unevictable) | +#ifdef CONFIG_64BIT + (1L << PG_arch_2) | +#endif (1L << PG_dirty))); /* ->mapping in first tail page is compound_mapcount */ diff --git a/mm/mmap.c b/mm/mmap.c index 40248d84ad5f..eed30b096667 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1812,6 +1812,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_anonymous(vma); } + /* Allow architectures to sanity-check the vm_flags */ + if (!arch_validate_flags(vma->vm_flags)) { + error = -EINVAL; + if (file) + goto unmap_and_free_vma; + else + goto free_vma; + } + vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { diff --git a/mm/mprotect.c b/mm/mprotect.c index ce8b8a5eacbb..56c02beb6041 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -603,6 +603,12 @@ static int do_mprotect_pkey(unsigned long start, size_t len, goto out; } + /* Allow architectures to sanity-check the new flags */ + if (!arch_validate_flags(newflags)) { + error = -EINVAL; + goto out; + } + error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; diff --git a/mm/page_io.c b/mm/page_io.c index e485a6e8a6cd..4ca28aad0d94 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -252,6 +252,16 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } + /* + * Arch code may have to preserve more data than just the page + * contents, e.g. memory tags. + */ + ret = arch_prepare_to_swap(page); + if (ret) { + set_page_dirty(page); + unlock_page(page); + goto out; + } if (frontswap_store(page) == 0) { set_page_writeback(page); unlock_page(page); diff --git a/mm/shmem.c b/mm/shmem.c index 271548ca20f3..e57d3314dc4b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1734,6 +1734,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, } wait_on_page_writeback(page); + /* + * Some architectures may have to restore extra metadata to the + * physical page after reading from swap. + */ + arch_swap_restore(swap, page); + if (shmem_should_replace_page(page, gfp)) { error = shmem_replace_page(&page, gfp, info, index); if (error) @@ -2267,6 +2273,9 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags &= ~(VM_MAYWRITE); } + /* arm64 - allow memory tagging on RAM-based files */ + vma->vm_flags |= VM_MTE_ALLOWED; + file_accessed(file); vma->vm_ops = &shmem_vm_ops; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && diff --git a/mm/swapfile.c b/mm/swapfile.c index 12f59e641b5e..4b1d1a04e327 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -717,6 +717,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, else swap_slot_free_notify = NULL; while (offset <= end) { + arch_swap_invalidate_page(si->type, offset); frontswap_invalidate_page(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); @@ -2682,6 +2683,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) frontswap_map = frontswap_map_get(p); spin_unlock(&p->lock); spin_unlock(&swap_lock); + arch_swap_invalidate_area(p->type); frontswap_invalidate_area(p->type); frontswap_map_set(p, NULL); mutex_unlock(&swapon_mutex); diff --git a/mm/util.c b/mm/util.c index 5ef378a2a038..4e21fe7eae27 100644 --- a/mm/util.c +++ b/mm/util.c @@ -957,7 +957,7 @@ out: return res; } -int memcmp_pages(struct page *page1, struct page *page2) +int __weak memcmp_pages(struct page *page1, struct page *page2) { char *addr1, *addr2; int ret; diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 58c0eab71bca..0517c744b04e 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -78,6 +78,7 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 /* [48-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API @@ -135,6 +136,7 @@ static const char * const page_flag_names[] = { [KPF_ARCH] = "h:arch", [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", + [KPF_ARCH_2] = "H:arch_2", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", |