summaryrefslogtreecommitdiffstats
path: root/mm/internal.h
diff options
context:
space:
mode:
authorJeff Xu <jeffxu@chromium.org>2024-04-15 18:35:21 +0200
committerAndrew Morton <akpm@linux-foundation.org>2024-05-24 04:40:26 +0200
commit8be7258aad44b5e25977a98db136f677fa6f4370 (patch)
tree62b2b8937d4d8c22469c82e440e89d08615e3966 /mm/internal.h
parentmseal: wire up mseal syscall (diff)
downloadlinux-8be7258aad44b5e25977a98db136f677fa6f4370.tar.xz
linux-8be7258aad44b5e25977a98db136f677fa6f4370.zip
mseal: add mseal syscall
The new mseal() is an syscall on 64 bit CPU, and with following signature: int mseal(void addr, size_t len, unsigned long flags) addr/len: memory range. flags: reserved. mseal() blocks following operations for the given memory range. 1> Unmapping, moving to another location, and shrinking the size, via munmap() and mremap(), can leave an empty space, therefore can be replaced with a VMA with a new set of attributes. 2> Moving or expanding a different VMA into the current location, via mremap(). 3> Modifying a VMA via mmap(MAP_FIXED). 4> Size expansion, via mremap(), does not appear to pose any specific risks to sealed VMAs. It is included anyway because the use case is unclear. In any case, users can rely on merging to expand a sealed VMA. 5> mprotect() and pkey_mprotect(). 6> Some destructive madvice() behaviors (e.g. MADV_DONTNEED) for anonymous memory, when users don't have write permission to the memory. Those behaviors can alter region contents by discarding pages, effectively a memset(0) for anonymous memory. Following input during RFC are incooperated into this patch: Jann Horn: raising awareness and providing valuable insights on the destructive madvise operations. Linus Torvalds: assisting in defining system call signature and scope. Liam R. Howlett: perf optimization. Theo de Raadt: sharing the experiences and insight gained from implementing mimmutable() in OpenBSD. Finally, the idea that inspired this patch comes from Stephen Röttger's work in Chrome V8 CFI. [jeffxu@chromium.org: add branch prediction hint, per Pedro] Link: https://lkml.kernel.org/r/20240423192825.1273679-2-jeffxu@chromium.org Link: https://lkml.kernel.org/r/20240415163527.626541-3-jeffxu@chromium.org Signed-off-by: Jeff Xu <jeffxu@chromium.org> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Pedro Falcato <pedro.falcato@gmail.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guenter Roeck <groeck@chromium.org> Cc: Jann Horn <jannh@google.com> Cc: Jeff Xu <jeffxu@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Jorge Lucangeli Obes <jorgelo@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Muhammad Usama Anjum <usama.anjum@collabora.com> Cc: Pedro Falcato <pedro.falcato@gmail.com> Cc: Stephen Röttger <sroettger@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Amer Al Shanawany <amer.shanawany@gmail.com> Cc: Javier Carrasco <javier.carrasco.cruz@gmail.com> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/internal.h')
-rw-r--r--mm/internal.h37
1 files changed, 37 insertions, 0 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 2adabe369403..b2c75b12014e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1435,6 +1435,43 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
+#ifdef CONFIG_64BIT
+/* VM is sealed, in vm_flags */
+#define VM_SEALED _BITUL(63)
+#endif
+
+#ifdef CONFIG_64BIT
+static inline int can_do_mseal(unsigned long flags)
+{
+ if (flags)
+ return -EINVAL;
+
+ return 0;
+}
+
+bool can_modify_mm(struct mm_struct *mm, unsigned long start,
+ unsigned long end);
+bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior);
+#else
+static inline int can_do_mseal(unsigned long flags)
+{
+ return -EPERM;
+}
+
+static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ return true;
+}
+
+static inline bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_SHRINKER_DEBUG
static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
struct shrinker *shrinker, const char *fmt, va_list ap)