diff options
Diffstat (limited to 'include')
76 files changed, 903 insertions, 184 deletions
diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h index 34785c0f57b0..5a80f8e54300 100644 --- a/include/asm-generic/checksum.h +++ b/include/asm-generic/checksum.h @@ -25,15 +25,6 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); */ extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum); -/* - * the same as csum_partial_copy, but copies from user space. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *csum_err); - #ifndef csum_partial_copy_nocheck #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy((src), (dst), (len), (sum)) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 71e387a5fe90..db600ef218d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -541,6 +541,15 @@ __end_rodata = .; /* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + +/* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map * @@ -551,6 +560,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 59494df0f55b..56d6a5c6e353 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -5,12 +5,15 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H +#include <linux/init.h> #include <uapi/linux/const.h> /* * This file provides common defines for ARM SMC Calling Convention as * specified in - * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + * https://developer.arm.com/docs/den0028/latest + * + * This code is up-to-date with version DEN 0028 C */ #define ARM_SMCCC_STD_CALL _AC(0,U) @@ -56,6 +59,7 @@ #define ARM_SMCCC_VERSION_1_0 0x10000 #define ARM_SMCCC_VERSION_1_1 0x10001 +#define ARM_SMCCC_VERSION_1_2 0x10002 #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ @@ -98,6 +102,19 @@ enum arm_smccc_conduit { enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); /** + * arm_smccc_get_version() + * + * Returns the version to be used for SMCCCv1.1 or later. + * + * When SMCCCv1.1 or above is not present, returns SMCCCv1.0, but this + * does not imply the presence of firmware or a valid conduit. Caller + * handling SMCCCv1.0 must determine the conduit by other means. + */ +u32 arm_smccc_get_version(void); + +void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); + +/** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 */ @@ -314,10 +331,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) -/* Return codes defined in ARM DEN 0070A */ +/* + * Return codes defined in ARM DEN 0070A + * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C + */ #define SMCCC_RET_SUCCESS 0 #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2 +#define SMCCC_RET_INVALID_PARAMETER -3 /* * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. diff --git a/include/linux/capability.h b/include/linux/capability.h index ecce0f43c73a..027d7e4a853b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -251,6 +251,10 @@ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); +static inline bool perfmon_capable(void) +{ + return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN); +} /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/compat.h b/include/linux/compat.h index 0480ba4db592..e90100c0de72 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -402,8 +402,15 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); -int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from); -int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); +void copy_siginfo_to_external32(struct compat_siginfo *to, + const struct kernel_siginfo *from); +int copy_siginfo_from_user32(kernel_siginfo_t *to, + const struct compat_siginfo __user *from); +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, + const kernel_siginfo_t *from); +#ifndef copy_siginfo_to_user32 +#define copy_siginfo_to_user32 __copy_siginfo_to_user32 +#endif int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 333a6695a918..790c0c6b8552 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -42,3 +42,7 @@ * compilers, like ICC. */ #define barrier() __asm__ __volatile__("" : : : "memory") + +#if __has_feature(shadow_call_stack) +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 448c91bf543b..6325d64e3c3b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..6fcf73200b67 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without @@ -193,6 +197,10 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif +#ifndef __noscs +# define __noscs +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/configfs.h b/include/linux/configfs.h index fa9490a8874c..2e8c69b43c64 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -13,7 +13,7 @@ * * configfs Copyright (C) 2005 Oracle. All rights reserved. * - * Please read Documentation/filesystems/configfs/configfs.txt before using + * Please read Documentation/filesystems/configfs.rst before using * the configfs interface, ESPECIALLY the parts about reference counts and * item destructors. */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index beaed2dc269e..52692587f7fe 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -144,18 +144,8 @@ static inline void get_online_cpus(void) { cpus_read_lock(); } static inline void put_online_cpus(void) { cpus_read_unlock(); } #ifdef CONFIG_PM_SLEEP_SMP -int __freeze_secondary_cpus(int primary, bool suspend); -static inline int freeze_secondary_cpus(int primary) -{ - return __freeze_secondary_cpus(primary, true); -} - -static inline int disable_nonboot_cpus(void) -{ - return __freeze_secondary_cpus(0, false); -} - -void enable_nonboot_cpus(void); +extern int freeze_secondary_cpus(int primary); +extern void thaw_secondary_cpus(void); static inline int suspend_disable_secondary_cpus(void) { @@ -168,12 +158,11 @@ static inline int suspend_disable_secondary_cpus(void) } static inline void suspend_enable_secondary_cpus(void) { - return enable_nonboot_cpus(); + return thaw_secondary_cpus(); } #else /* !CONFIG_PM_SLEEP_SMP */ -static inline int disable_nonboot_cpus(void) { return 0; } -static inline void enable_nonboot_cpus(void) {} +static inline void thaw_secondary_cpus(void) {} static inline int suspend_disable_secondary_cpus(void) { return 0; } static inline void suspend_enable_secondary_cpus(void) { } #endif /* !CONFIG_PM_SLEEP_SMP */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 4664fc1871de..bc156285d097 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -97,8 +97,6 @@ extern void unregister_oldmem_pfn_is_ram(void); static inline bool is_kdump_kernel(void) { return 0; } #endif /* CONFIG_CRASH_DUMP */ -extern unsigned long saved_max_pfn; - /* Device Dump information to be filled by drivers */ struct vmcoredd_data { char dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Unique name of the dump */ diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 9a72214496e5..d02f32b7514e 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -44,6 +44,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev) if (!S_ISBLK(mode) && !S_ISCHR(mode)) return 0; + if (S_ISCHR(mode) && dev == WHITEOUT_DEV) + return 0; + if (S_ISBLK(mode)) type = DEVCG_DEV_BLOCK; else diff --git a/include/linux/efi.h b/include/linux/efi.h index 9430d01c0c3d..2c6495f72f79 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -39,6 +39,7 @@ #define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) #define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_TIMEOUT (18 | (1UL << (BITS_PER_LONG-1))) #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) @@ -379,8 +380,8 @@ typedef union { typedef struct { efi_guid_t guid; - const char *name; unsigned long *ptr; + const char name[16]; } efi_config_table_type_t; #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL) @@ -426,6 +427,7 @@ typedef struct { u32 tables; } efi_system_table_32_t; +typedef union efi_simple_text_input_protocol efi_simple_text_input_protocol_t; typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; typedef union { @@ -434,7 +436,7 @@ typedef union { unsigned long fw_vendor; /* physical addr of CHAR16 vendor string */ u32 fw_revision; unsigned long con_in_handle; - unsigned long con_in; + efi_simple_text_input_protocol_t *con_in; unsigned long con_out_handle; efi_simple_text_output_protocol_t *con_out; unsigned long stderr_handle; diff --git a/include/linux/elf.h b/include/linux/elf.h index e3649b3e970e..5d5b0321da0b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -2,6 +2,7 @@ #ifndef _LINUX_ELF_H #define _LINUX_ELF_H +#include <linux/types.h> #include <asm/elf.h> #include <uapi/linux/elf.h> @@ -21,6 +22,9 @@ SET_PERSONALITY(ex) #endif +#define ELF32_GNU_PROPERTY_ALIGN 4 +#define ELF64_GNU_PROPERTY_ALIGN 8 + #if ELF_CLASS == ELFCLASS32 extern Elf32_Dyn _DYNAMIC []; @@ -31,6 +35,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half #define Elf_Word Elf32_Word +#define ELF_GNU_PROPERTY_ALIGN ELF32_GNU_PROPERTY_ALIGN #else @@ -42,6 +47,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half #define Elf_Word Elf64_Word +#define ELF_GNU_PROPERTY_ALIGN ELF64_GNU_PROPERTY_ALIGN #endif @@ -56,4 +62,41 @@ static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { extern int elf_coredump_extra_notes_size(void); extern int elf_coredump_extra_notes_write(struct coredump_params *cprm); #endif + +/* + * NT_GNU_PROPERTY_TYPE_0 header: + * Keep this internal until/unless there is an agreed UAPI definition. + * pr_type values (GNU_PROPERTY_*) are public and defined in the UAPI header. + */ +struct gnu_property { + u32 pr_type; + u32 pr_datasz; +}; + +struct arch_elf_state; + +#ifndef CONFIG_ARCH_USE_GNU_PROPERTY +static inline int arch_parse_elf_property(u32 type, const void *data, + size_t datasz, bool compat, + struct arch_elf_state *arch) +{ + return 0; +} +#else +extern int arch_parse_elf_property(u32 type, const void *data, size_t datasz, + bool compat, struct arch_elf_state *arch); +#endif + +#ifdef CONFIG_ARCH_HAVE_ELF_PROT +int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, + bool has_interp, bool is_interp); +#else +static inline int arch_elf_adjust_prot(int prot, + const struct arch_elf_state *state, + bool has_interp, bool is_interp) +{ + return prot; +} +#endif + #endif /* _LINUX_ELF_H */ diff --git a/include/linux/frame.h b/include/linux/frame.h index 02d3ca2d9598..303cda600e56 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -15,9 +15,20 @@ static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL \ + 999: \ + .pushsection .discard.intra_function_calls; \ + .long 999b; \ + .popsection; + #else /* !CONFIG_STACK_VALIDATION */ #define STACK_FRAME_NON_STANDARD(func) +#define ANNOTATE_INTRA_FUNCTION_CALL #endif /* CONFIG_STACK_VALIDATION */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 1434ed801b80..ef6acd2062eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,7 +1727,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); -extern int vfs_whiteout(struct inode *, struct dentry *); + +static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry) +{ + return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); +} extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index e6c3e4c61dad..5f24fcbfbfb4 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -85,7 +85,7 @@ struct p_log { * Superblock creation fills in ->root whereas reconfiguration begins with this * already set. * - * See Documentation/filesystems/mount_api.txt + * See Documentation/filesystems/mount_api.rst */ struct fs_context { const struct fs_context_operations *ops; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index d5ba431b5d63..ce0b5fbf239d 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -6,7 +6,7 @@ * * NOTE!!! See: * - * Documentation/filesystems/caching/backend-api.txt + * Documentation/filesystems/caching/backend-api.rst * * for a description of the cache backend interface declared here. */ @@ -454,7 +454,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object) * Set the maximum size an object is permitted to reach, implying the highest * byte that may be written. Intended to be called by the attr_changed() op. * - * See Documentation/filesystems/caching/backend-api.txt for a complete + * See Documentation/filesystems/caching/backend-api.rst for a complete * description. */ static inline diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ad044c0cb1f3..a1c928fe98e7 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -6,7 +6,7 @@ * * NOTE!!! See: * - * Documentation/filesystems/caching/netfs-api.txt + * Documentation/filesystems/caching/netfs-api.rst * * for a description of the network filesystem interface declared here. */ @@ -233,7 +233,7 @@ extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_ * * Register a filesystem as desiring caching services if they're available. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -253,7 +253,7 @@ int fscache_register_netfs(struct fscache_netfs *netfs) * Indicate that a filesystem no longer desires caching services for the * moment. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -270,7 +270,7 @@ void fscache_unregister_netfs(struct fscache_netfs *netfs) * Acquire a specific cache referral tag that can be used to select a specific * cache in which to cache an index. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -288,7 +288,7 @@ struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) * * Release a reference to a cache referral tag previously looked up. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -315,7 +315,7 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag) * that can be used to locate files. This is done by requesting a cookie for * each index in the path to the file. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -351,7 +351,7 @@ struct fscache_cookie *fscache_acquire_cookie( * provided to update the auxiliary data in the cache before the object is * disconnected. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -394,7 +394,7 @@ int fscache_check_consistency(struct fscache_cookie *cookie, * cookie. The auxiliary data on the cookie will be updated first if @aux_data * is set. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -410,7 +410,7 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data) * * Permit data-storage cache objects to be pinned in the cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -425,7 +425,7 @@ int fscache_pin_cookie(struct fscache_cookie *cookie) * * Permit data-storage cache objects to be unpinned from the cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -441,7 +441,7 @@ void fscache_unpin_cookie(struct fscache_cookie *cookie) * changed. This includes the data size. These attributes will be obtained * through the get_attr() cookie definition op. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -463,7 +463,7 @@ int fscache_attr_changed(struct fscache_cookie *cookie) * * This can be called with spinlocks held. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -479,7 +479,7 @@ void fscache_invalidate(struct fscache_cookie *cookie) * * Wait for the invalidation of an object to complete. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -498,7 +498,7 @@ void fscache_wait_on_invalidate(struct fscache_cookie *cookie) * cookie so that a write to that object within the space can always be * honoured. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -533,7 +533,7 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) * Else, if the page is unbacked, -ENODATA is returned and a block may have * been allocated in the cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -582,7 +582,7 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie, * regard to different pages, the return values are prioritised in that order. * Any pages submitted for reading are removed from the pages list. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -617,7 +617,7 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, * Else, a block will be allocated if one wasn't already, and 0 will be * returned * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -667,7 +667,7 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie, * be cleared at the completion of the write to indicate the success or failure * of the operation. Note that the completion may happen before the return. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -693,7 +693,7 @@ int fscache_write_page(struct fscache_cookie *cookie, * Note that this cannot cancel any outstanding I/O operations between this * page and the cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -711,7 +711,7 @@ void fscache_uncache_page(struct fscache_cookie *cookie, * * Ask the cache if a page is being written to the cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline @@ -731,7 +731,7 @@ bool fscache_check_page_write(struct fscache_cookie *cookie, * Ask the cache to wake us up when a page is no longer being written to the * cache. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index ccda97dc7f8b..0abd9a1d2852 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,15 +2,6 @@ #ifndef _LINUX_FTRACE_IRQ_H #define _LINUX_FTRACE_IRQ_H - -#ifdef CONFIG_FTRACE_NMI_ENTER -extern void arch_ftrace_nmi_enter(void); -extern void arch_ftrace_nmi_exit(void); -#else -static inline void arch_ftrace_nmi_enter(void) { } -static inline void arch_ftrace_nmi_exit(void) { } -#endif - #ifdef CONFIG_HWLAT_TRACER extern bool trace_hwlat_callback_enabled; extern void trace_hwlat_callback(bool enter); @@ -22,12 +13,10 @@ static inline void ftrace_nmi_enter(void) if (trace_hwlat_callback_enabled) trace_hwlat_callback(true); #endif - arch_ftrace_nmi_enter(); } static inline void ftrace_nmi_exit(void) { - arch_ftrace_nmi_exit(); #ifdef CONFIG_HWLAT_TRACER if (trace_hwlat_callback_enabled) trace_hwlat_callback(false); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7c8b82f69288..e07cf853aa16 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,31 +2,28 @@ #ifndef LINUX_HARDIRQ_H #define LINUX_HARDIRQ_H +#include <linux/context_tracking_state.h> #include <linux/preempt.h> #include <linux/lockdep.h> #include <linux/ftrace_irq.h> #include <linux/vtime.h> #include <asm/hardirq.h> - extern void synchronize_irq(unsigned int irq); extern bool synchronize_hardirq(unsigned int irq); -#if defined(CONFIG_TINY_RCU) - -static inline void rcu_nmi_enter(void) -{ -} +#ifdef CONFIG_NO_HZ_FULL +void __rcu_irq_enter_check_tick(void); +#else +static inline void __rcu_irq_enter_check_tick(void) { } +#endif -static inline void rcu_nmi_exit(void) +static __always_inline void rcu_irq_enter_check_tick(void) { + if (context_tracking_enabled()) + __rcu_irq_enter_check_tick(); } -#else -extern void rcu_nmi_enter(void); -extern void rcu_nmi_exit(void); -#endif - /* * It is safe to do non-atomic ops on ->hardirq_context, * because NMI handlers may not preempt and the ops are @@ -65,14 +62,34 @@ extern void irq_exit(void); #define arch_nmi_exit() do { } while (0) #endif +#ifdef CONFIG_TINY_RCU +static inline void rcu_nmi_enter(void) { } +static inline void rcu_nmi_exit(void) { } +#else +extern void rcu_nmi_enter(void); +extern void rcu_nmi_exit(void); +#endif + +/* + * NMI vs Tracing + * -------------- + * + * We must not land in a tracer until (or after) we've changed preempt_count + * such that in_nmi() becomes true. To that effect all NMI C entry points must + * be marked 'notrace' and call nmi_enter() as soon as possible. + */ + +/* + * nmi_enter() can nest up to 15 times; see NMI_BITS. + */ #define nmi_enter() \ do { \ arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ - BUG_ON(in_nmi()); \ - preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ + BUG_ON(in_nmi() == NMI_MASK); \ + __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ lockdep_hardirq_enter(); \ } while (0) @@ -82,7 +99,7 @@ extern void irq_exit(void); lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ - preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ + __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ diff --git a/include/linux/idr.h b/include/linux/idr.h index ac6e946b6767..3ade03e5c7af 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -171,7 +171,7 @@ static inline bool idr_is_empty(const struct idr *idr) */ static inline void idr_preload_end(void) { - preempt_enable(); + local_unlock(&radix_tree_preloads.lock); } /** diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e2ca0a292e21..fc8d83e91379 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -7,7 +7,7 @@ * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com> * Copyright (c) 2006-2008 Novell Inc. * - * Please read Documentation/kobject.txt before using the kobject + * Please read Documentation/core-api/kobject.rst before using the kobject * interface, ESPECIALLY the parts about reference counts and object * destructors. */ diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 069aa2ebef90..2b5b64256cf4 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -8,7 +8,7 @@ * * Split from kobject.h by David Howells (dhowells@redhat.com) * - * Please read Documentation/kobject.txt before using the kobject + * Please read Documentation/core-api/kobject.rst before using the kobject * interface, ESPECIALLY the parts about reference counts and object * destructors. */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 9280209d1f62..d796ec20d114 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -105,7 +105,7 @@ /* === DEPRECATED annotations === */ -#ifndef CONFIG_X86 +#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS #ifndef GLOBAL /* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */ #define GLOBAL(name) \ @@ -118,10 +118,10 @@ #define ENTRY(name) \ SYM_FUNC_START(name) #endif -#endif /* CONFIG_X86 */ +#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */ #endif /* LINKER_SCRIPT */ -#ifndef CONFIG_X86 +#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS #ifndef WEAK /* deprecated, use SYM_FUNC_START_WEAK* */ #define WEAK(name) \ @@ -143,7 +143,7 @@ #define ENDPROC(name) \ SYM_FUNC_END(name) #endif -#endif /* CONFIG_X86 */ +#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */ /* === generic annotations === */ diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h new file mode 100644 index 000000000000..e55010fa7329 --- /dev/null +++ b/include/linux/local_lock.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LOCAL_LOCK_H +#define _LINUX_LOCAL_LOCK_H + +#include <linux/local_lock_internal.h> + +/** + * local_lock_init - Runtime initialize a lock instance + */ +#define local_lock_init(lock) __local_lock_init(lock) + +/** + * local_lock - Acquire a per CPU local lock + * @lock: The lock variable + */ +#define local_lock(lock) __local_lock(lock) + +/** + * local_lock_irq - Acquire a per CPU local lock and disable interrupts + * @lock: The lock variable + */ +#define local_lock_irq(lock) __local_lock_irq(lock) + +/** + * local_lock_irqsave - Acquire a per CPU local lock, save and disable + * interrupts + * @lock: The lock variable + * @flags: Storage for interrupt flags + */ +#define local_lock_irqsave(lock, flags) \ + __local_lock_irqsave(lock, flags) + +/** + * local_unlock - Release a per CPU local lock + * @lock: The lock variable + */ +#define local_unlock(lock) __local_unlock(lock) + +/** + * local_unlock_irq - Release a per CPU local lock and enable interrupts + * @lock: The lock variable + */ +#define local_unlock_irq(lock) __local_unlock_irq(lock) + +/** + * local_unlock_irqrestore - Release a per CPU local lock and restore + * interrupt flags + * @lock: The lock variable + * @flags: Interrupt flags to restore + */ +#define local_unlock_irqrestore(lock, flags) \ + __local_unlock_irqrestore(lock, flags) + +#endif diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h new file mode 100644 index 000000000000..4a8795b21d77 --- /dev/null +++ b/include/linux/local_lock_internal.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LOCAL_LOCK_H +# error "Do not include directly, include linux/local_lock.h" +#endif + +#include <linux/percpu-defs.h> +#include <linux/lockdep.h> + +typedef struct { +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + struct task_struct *owner; +#endif +} local_lock_t; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LL_DEP_MAP_INIT(lockname) \ + .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_CONFIG, \ + } +#else +# define LL_DEP_MAP_INIT(lockname) +#endif + +#define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) } + +#define __local_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ + lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\ +} while (0) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void local_lock_acquire(local_lock_t *l) +{ + lock_map_acquire(&l->dep_map); + DEBUG_LOCKS_WARN_ON(l->owner); + l->owner = current; +} + +static inline void local_lock_release(local_lock_t *l) +{ + DEBUG_LOCKS_WARN_ON(l->owner != current); + l->owner = NULL; + lock_map_release(&l->dep_map); +} + +#else /* CONFIG_DEBUG_LOCK_ALLOC */ +static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_lock_release(local_lock_t *l) { } +#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ + +#define __local_lock(lock) \ + do { \ + preempt_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_lock_irq(lock) \ + do { \ + local_irq_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_lock_irqsave(lock, flags) \ + do { \ + local_irq_save(flags); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock(lock) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ + preempt_enable(); \ + } while (0) + +#define __local_unlock_irq(lock) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ + local_irq_enable(); \ + } while (0) + +#define __local_unlock_irqrestore(lock, flags) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ + local_irq_restore(flags); \ + } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 206774ac6946..8fce5c98a4b0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -308,8 +308,27 @@ extern void lockdep_set_selftest_task(struct task_struct *task); extern void lockdep_init_task(struct task_struct *task); -extern void lockdep_off(void); -extern void lockdep_on(void); +/* + * Split the recrursion counter in two to readily detect 'off' vs recursion. + */ +#define LOCKDEP_RECURSION_BITS 16 +#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS) +#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1) + +/* + * lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due + * to header dependencies. + */ + +#define lockdep_off() \ +do { \ + current->lockdep_recursion += LOCKDEP_OFF; \ +} while (0) + +#define lockdep_on() \ +do { \ + current->lockdep_recursion -= LOCKDEP_OFF; \ +} while (0) extern void lockdep_register_key(struct lock_class_key *key); extern void lockdep_unregister_key(struct lock_class_key *key); diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 988ca0df7824..44d5422c18e4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -77,7 +77,7 @@ * state. This is called immediately after commit_creds(). * * Security hooks for mount using fs_context. - * [See also Documentation/filesystems/mount_api.txt] + * [See also Documentation/filesystems/mount_api.rst] * * @fs_context_dup: * Allocate and attach a security structure to sc->security. This pointer diff --git a/include/linux/mm.h b/include/linux/mm.h index fda41eb7f1c8..6e6c71cdfa13 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -325,17 +325,13 @@ extern unsigned int kobjsize(const void *objp); #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI +#elif defined(CONFIG_ARM64) +# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */ +# define VM_ARCH_CLEAR VM_ARM64_BTI #elif !defined(CONFIG_MMU) # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif -#if defined(CONFIG_X86_INTEL_MPX) -/* MPX specific bounds table or bounds directory */ -# define VM_MPX VM_HIGH_ARCH_4 -#else -# define VM_MPX VM_NONE -#endif - #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif @@ -1230,7 +1226,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS * scheme). * - * For more information, please see Documentation/vm/pin_user_pages.rst. + * For more information, please see Documentation/core-api/pin_user_pages.rst. * * @page: pointer to page to be queried. * @Return: True, if it is likely that the page has been "dma-pinned". @@ -2874,7 +2870,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * releasing pages: get_user_pages*() pages must be released via put_page(), * while pin_user_pages*() pages must be released via unpin_user_page(). * - * Please see Documentation/vm/pin_user_pages.rst for more information. + * Please see Documentation/core-api/pin_user_pages.rst for more information. */ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a89f47515eb1..fdd9beb5efed 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -156,6 +156,9 @@ enum zone_stat_item { NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_KB, /* measured in KiB */ +#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4c2ddd0941a7..0754b8d71262 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -663,6 +663,7 @@ struct x86_cpu_id { __u16 vendor; __u16 family; __u16 model; + __u16 steppings; __u16 feature; /* bit index */ kernel_ulong_t driver_data; }; @@ -671,6 +672,7 @@ struct x86_cpu_id { #define X86_VENDOR_ANY 0xffff #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 +#define X86_STEPPING_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ /* diff --git a/include/linux/module.h b/include/linux/module.h index 1ad393e62bef..d849d06e4d44 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -458,6 +458,8 @@ struct module { void __percpu *percpu; unsigned int percpu_size; #endif + void *noinstr_text_start; + unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; @@ -489,6 +491,12 @@ struct module { unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_KPROBES + void *kprobes_text_start; + unsigned int kprobes_text_size; + unsigned long *kprobe_blacklist; + unsigned int num_kprobe_blacklist; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index ca92aea8a6bd..4fa67a8b2265 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -29,6 +29,11 @@ void *module_alloc(unsigned long size); /* Free memory returned from module_alloc. */ void module_memfree(void *module_region); +/* Determines if the section name is an init section (that is only used during + * module loading). + */ +bool module_init_section(const char *name); + /* Determines if the section name is an exit section (that is only used during * module unloading) */ diff --git a/include/linux/mount.h b/include/linux/mount.h index bf8cc4108b8f..7edac8c7a9c1 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -50,7 +50,8 @@ struct fs_context; #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ + MNT_CURSOR) #define MNT_INTERNAL 0x4000 @@ -64,6 +65,7 @@ struct fs_context; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 +#define MNT_CURSOR 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9c3e7619c929..d7b610c4eebd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,7 +61,7 @@ struct perf_guest_info_callbacks { struct perf_callchain_entry { __u64 nr; - __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ + __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_callchain_entry_ctx { @@ -113,7 +113,7 @@ struct perf_raw_record { struct perf_branch_stack { __u64 nr; __u64 hw_idx; - struct perf_branch_entry entries[0]; + struct perf_branch_entry entries[]; }; struct task_struct; @@ -1305,7 +1305,7 @@ static inline int perf_is_paranoid(void) static inline int perf_allow_kernel(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) return -EACCES; return security_perf_event_open(attr, PERF_SECURITY_KERNEL); @@ -1313,7 +1313,7 @@ static inline int perf_allow_kernel(struct perf_event_attr *attr) static inline int perf_allow_cpu(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) return -EACCES; return security_perf_event_open(attr, PERF_SECURITY_CPU); @@ -1321,7 +1321,7 @@ static inline int perf_allow_cpu(struct perf_event_attr *attr) static inline int perf_allow_tracepoint(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) return -EPERM; return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); diff --git a/include/linux/platform_data/ad5761.h b/include/linux/platform_data/ad5761.h index 02bef5177ff5..69e261e2ca14 100644 --- a/include/linux/platform_data/ad5761.h +++ b/include/linux/platform_data/ad5761.h @@ -3,7 +3,7 @@ * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter * * Copyright 2016 Qtechnology A/S - * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com> + * 2016 Ricardo Ribalda <ribalda@kernel.org> */ #ifndef __LINUX_PLATFORM_DATA_AD5761_H__ #define __LINUX_PLATFORM_DATA_AD5761_H__ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index bc3f1aecaa19..7d9c1c0e149c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -26,13 +26,13 @@ * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x000f0000 - * NMI_MASK: 0x00100000 + * NMI_MASK: 0x00f00000 * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 #define HARDIRQ_BITS 4 -#define NMI_BITS 1 +#define NMI_BITS 4 #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) diff --git a/include/linux/printk.h b/include/linux/printk.h index 07f2d08f79ff..15c8133b194f 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -279,39 +279,116 @@ static inline void printk_safe_flush_on_panic(void) extern int kptr_restrict; +/** + * pr_fmt - used by the pr_*() macros to generate the printk format string + * @fmt: format string passed from a pr_*() macro + * + * This macro can be used to generate a unified format string for pr_*() + * macros. A common use is to prefix all pr_*() messages in a file with a common + * string. For example, defining this at the top of a source file: + * + * #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + * + * would prefix all pr_info, pr_emerg... messages in the file with the module + * name. + */ #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif -/* - * These can be used to print at the various log levels. - * All of these will print unconditionally, although note that pr_debug() - * and other debug macros are compiled out unless either DEBUG is defined - * or CONFIG_DYNAMIC_DEBUG is set. +/** + * pr_emerg - Print an emergency-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to + * generate the format string. */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_alert - Print an alert-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to + * generate the format string. + */ #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_crit - Print a critical-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to + * generate the format string. + */ #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_err - Print an error-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to + * generate the format string. + */ #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_warn - Print a warning-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt() + * to generate the format string. + */ #define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_notice - Print a notice-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to + * generate the format string. + */ #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +/** + * pr_info - Print an info-level message + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to + * generate the format string. + */ #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -/* - * Like KERN_CONT, pr_cont() should only be used when continuing - * a line with no newline ('\n') enclosed. Otherwise it defaults - * back to KERN_DEFAULT. + +/** + * pr_cont - Continues a previous log message in the same line. + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_CONT loglevel. It should only be + * used when continuing a log message with no newline ('\n') enclosed. Otherwise + * it defaults back to KERN_DEFAULT loglevel. */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) -/* pr_devel() should produce zero code unless DEBUG is defined */ +/** + * pr_devel - Print a debug-level message conditionally + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is + * defined. Otherwise it does nothing. + * + * It uses pr_fmt() to generate the format string. + */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) @@ -325,8 +402,19 @@ extern int kptr_restrict; #if defined(CONFIG_DYNAMIC_DEBUG) #include <linux/dynamic_debug.h> -/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ -#define pr_debug(fmt, ...) \ +/** + * pr_debug - Print a debug-level message conditionally + * @fmt: format string + * @...: arguments for the format string + * + * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is + * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with + * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing. + * + * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses + * pr_fmt() internally). + */ +#define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) #elif defined(DEBUG) #define pr_debug(fmt, ...) \ diff --git a/include/linux/psci.h b/include/linux/psci.h index a67712b73b6c..14ad9b9ebcd6 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,11 +21,6 @@ bool psci_power_state_is_valid(u32 state); int psci_set_osi_mode(void); bool psci_has_osi_support(void); -enum smccc_version { - SMCCC_VERSION_1_0, - SMCCC_VERSION_1_1, -}; - struct psci_operations { u32 (*get_version)(void); int (*cpu_suspend)(u32 state, unsigned long entry_point); @@ -35,8 +30,6 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); - enum arm_smccc_conduit conduit; - enum smccc_version smccc_version; }; extern struct psci_operations psci_ops; diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 63e62372443a..c2a9f7c90727 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -16,11 +16,20 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/xarray.h> +#include <linux/local_lock.h> /* Keep unconverted code working */ #define radix_tree_root xarray #define radix_tree_node xa_node +struct radix_tree_preload { + local_lock_t lock; + unsigned nr; + /* nodes->parent points to next preallocated node */ + struct radix_tree_node *nodes; +}; +DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads); + /* * The bottom two bits of the slot determine how the remaining bits in the * slot are interpreted: @@ -245,7 +254,7 @@ int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); static inline void radix_tree_preload_end(void) { - preempt_enable(); + local_unlock(&radix_tree_preloads.lock); } void __rcu **idr_get_free(struct radix_tree_root *root, diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 1fd61a9af45c..d7db17996322 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -11,7 +11,7 @@ I know it's not the cleaner way, but in C (not in C++) to get performances and genericity... - See Documentation/rbtree.txt for documentation and samples. + See Documentation/core-api/rbtree.rst for documentation and samples. */ #ifndef _LINUX_RBTREE_H diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 724b0d036b57..d1c53e9d8c75 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -21,7 +21,7 @@ * rb_insert_augmented() and rb_erase_augmented() are intended to be public. * The rest are implementation details you are not expected to depend on. * - * See Documentation/rbtree.txt for documentation and samples. + * See Documentation/core-api/rbtree.rst for documentation and samples. */ struct rb_augment_callbacks { diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8214cdc715f2..7375bb3da140 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -371,7 +371,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. - * @cond...: optional lockdep expression if called from non-RCU protection. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() @@ -646,7 +646,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. - * @cond...: optional lockdep expression if called from non-RCU protection. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2678a37c3169..659cbfa7581a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -37,6 +37,7 @@ /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); +void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); #ifdef CONFIG_PREEMPT_RCU @@ -129,25 +130,57 @@ static inline void rcu_init_nohz(void) { } * Note a quasi-voluntary context switch for RCU-tasks's benefit. * This is a macro rather than an inline function to avoid #include hell. */ -#ifdef CONFIG_TASKS_RCU -#define rcu_tasks_qs(t) \ - do { \ - if (READ_ONCE((t)->rcu_tasks_holdout)) \ - WRITE_ONCE((t)->rcu_tasks_holdout, false); \ +#ifdef CONFIG_TASKS_RCU_GENERIC + +# ifdef CONFIG_TASKS_RCU +# define rcu_tasks_classic_qs(t, preempt) \ + do { \ + if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \ + WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) -#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); +# else +# define rcu_tasks_classic_qs(t, preempt) do { } while (0) +# define call_rcu_tasks call_rcu +# define synchronize_rcu_tasks synchronize_rcu +# endif + +# ifdef CONFIG_TASKS_RCU_TRACE +# define rcu_tasks_trace_qs(t) \ + do { \ + if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ + !unlikely(READ_ONCE((t)->trc_reader_nesting))) { \ + smp_store_release(&(t)->trc_reader_checked, true); \ + smp_mb(); /* Readers partitioned by store. */ \ + } \ + } while (0) +# else +# define rcu_tasks_trace_qs(t) do { } while (0) +# endif + +#define rcu_tasks_qs(t, preempt) \ +do { \ + rcu_tasks_classic_qs((t), (preempt)); \ + rcu_tasks_trace_qs((t)); \ +} while (0) + +# ifdef CONFIG_TASKS_RUDE_RCU +void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); +void synchronize_rcu_tasks_rude(void); +# endif + +#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); void exit_tasks_rcu_finish(void); -#else /* #ifdef CONFIG_TASKS_RCU */ -#define rcu_tasks_qs(t) do { } while (0) +#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ +#define rcu_tasks_qs(t, preempt) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_finish(void) { } -#endif /* #else #ifdef CONFIG_TASKS_RCU */ +#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU @@ -158,7 +191,7 @@ static inline void exit_tasks_rcu_finish(void) { } */ #define cond_resched_tasks_rcu_qs() \ do { \ - rcu_tasks_qs(current); \ + rcu_tasks_qs(current, false); \ cond_resched(); \ } while (0) diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h new file mode 100644 index 000000000000..4c25a41f8b27 --- /dev/null +++ b/include/linux/rcupdate_trace.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update mechanism for mutual exclusion, adapted for tracing. + * + * Copyright (C) 2020 Paul E. McKenney. + */ + +#ifndef __LINUX_RCUPDATE_TRACE_H +#define __LINUX_RCUPDATE_TRACE_H + +#include <linux/sched.h> +#include <linux/rcupdate.h> + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +extern struct lockdep_map rcu_trace_lock_map; + +static inline int rcu_read_lock_trace_held(void) +{ + return lock_is_held(&rcu_trace_lock_map); +} + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +static inline int rcu_read_lock_trace_held(void) +{ + return 1; +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_TASKS_TRACE_RCU + +void rcu_read_unlock_trace_special(struct task_struct *t, int nesting); + +/** + * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section + * + * When synchronize_rcu_trace() is invoked by one task, then that task + * is guaranteed to block until all other tasks exit their read-side + * critical sections. Similarly, if call_rcu_trace() is invoked on one + * task while other tasks are within RCU read-side critical sections, + * invocation of the corresponding RCU callback is deferred until after + * the all the other tasks exit their critical sections. + * + * For more details, please see the documentation for rcu_read_lock(). + */ +static inline void rcu_read_lock_trace(void) +{ + struct task_struct *t = current; + + WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1); + if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && + t->trc_reader_special.b.need_mb) + smp_mb(); // Pairs with update-side barriers + rcu_lock_acquire(&rcu_trace_lock_map); +} + +/** + * rcu_read_unlock_trace - mark end of RCU-trace read-side critical section + * + * Pairs with a preceding call to rcu_read_lock_trace(), and nesting is + * allowed. Invoking a rcu_read_unlock_trace() when there is no matching + * rcu_read_lock_trace() is verboten, and will result in lockdep complaints. + * + * For more details, please see the documentation for rcu_read_unlock(). + */ +static inline void rcu_read_unlock_trace(void) +{ + int nesting; + struct task_struct *t = current; + + rcu_lock_release(&rcu_trace_lock_map); + nesting = READ_ONCE(t->trc_reader_nesting) - 1; + if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { + WRITE_ONCE(t->trc_reader_nesting, nesting); + return; // We assume shallow reader nesting. + } + rcu_read_unlock_trace_special(t, nesting); +} + +void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); +void synchronize_rcu_tasks_trace(void); +void rcu_barrier_tasks_trace(void); + +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ + +#endif /* __LINUX_RCUPDATE_TRACE_H */ diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index c0578ba23c1a..699b938358bf 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -31,4 +31,23 @@ do { \ #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) +/** + * synchronize_rcu_mult - Wait concurrently for multiple grace periods + * @...: List of call_rcu() functions for different grace periods to wait on + * + * This macro waits concurrently for multiple types of RCU grace periods. + * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait + * on concurrent RCU and RCU-tasks grace periods. Waiting on a given SRCU + * domain requires you to write a wrapper function for that SRCU domain's + * call_srcu() function, with this wrapper supplying the pointer to the + * corresponding srcu_struct. + * + * The first argument tells Tiny RCU's _wait_rcu_gp() not to + * bother waiting for RCU. The reason for this is because anywhere + * synchronize_rcu_mult() can be called is automatically already a full + * grace period. + */ +#define synchronize_rcu_mult(...) \ + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 045c28b71f4f..8512caeb7682 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -49,7 +49,7 @@ static inline void rcu_softirq_qs(void) #define rcu_note_context_switch(preempt) \ do { \ rcu_qs(); \ - rcu_tasks_qs(current); \ + rcu_tasks_qs(current, (preempt)); \ } while (0) static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) @@ -71,6 +71,8 @@ static inline void rcu_irq_enter(void) { } static inline void rcu_irq_exit_irqson(void) { } static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } +static inline void rcu_irq_exit_preempt(void) { } +static inline void rcu_irq_exit_check_preempt(void) { } static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { @@ -85,8 +87,10 @@ static inline void rcu_scheduler_starting(void) { } static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } +static inline bool __rcu_is_watching(void) { return true; } static inline void rcu_momentary_dyntick_idle(void) { } static inline void kfree_rcu_scheduler_running(void) { } +static inline bool rcu_gp_might_be_stalled(void) { return false; } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 45f3f66bb04d..d5cc9d675987 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -39,6 +39,7 @@ void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); +bool rcu_gp_might_be_stalled(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); @@ -46,9 +47,16 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +void rcu_irq_exit_preempt(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); +#ifdef CONFIG_PROVE_RCU +void rcu_irq_exit_check_preempt(void); +#else +static inline void rcu_irq_exit_check_preempt(void) { } +#endif + void exit_rcu(void); void rcu_scheduler_starting(void); @@ -56,6 +64,7 @@ extern int rcu_scheduler_active __read_mostly; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); +bool __rcu_is_watching(void); #ifndef CONFIG_PREEMPTION void rcu_all_qs(void); #endif diff --git a/include/linux/relay.h b/include/linux/relay.h index c759f96e39c1..e13a333e7c37 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -141,7 +141,7 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * - * See Documentation/filesystems/relay.txt for more info. + * See Documentation/filesystems/relay.rst for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, diff --git a/include/linux/sched.h b/include/linux/sched.h index 12ef0c753284..33bb7c539246 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -613,7 +613,7 @@ union rcu_special { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ - u8 deferred_qs; + u8 need_mb; /* Readers need smp_mb(). */ } b; /* Bits. */ u32 s; /* Set of bits. */ }; @@ -724,6 +724,14 @@ struct task_struct { struct list_head rcu_tasks_holdout_list; #endif /* #ifdef CONFIG_TASKS_RCU */ +#ifdef CONFIG_TASKS_TRACE_RCU + int trc_reader_nesting; + int trc_ipi_to_cpu; + union rcu_special trc_reader_special; + bool trc_reader_checked; + struct list_head trc_holdout_list; +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ + struct sched_info sched_info; struct list_head tasks; @@ -1289,6 +1297,12 @@ struct task_struct { unsigned long prev_lowest_stack; #endif +#ifdef CONFIG_X86_MCE + u64 mce_addr; + u64 mce_status; + struct callback_head mce_kill_me; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/include/linux/scs.h b/include/linux/scs.h new file mode 100644 index 000000000000..6dec390cf154 --- /dev/null +++ b/include/linux/scs.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#ifndef _LINUX_SCS_H +#define _LINUX_SCS_H + +#include <linux/gfp.h> +#include <linux/poison.h> +#include <linux/sched.h> +#include <linux/sizes.h> + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit + * architecture) provided ~40% safety margin on stack usage while keeping + * memory allocation overhead reasonable. + */ +#define SCS_SIZE SZ_1K +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO) + +/* An illegal pointer value to mark the end of the shadow stack. */ +#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) + +/* Allocate a static per-CPU shadow stack */ +#define DEFINE_SCS(name) \ + DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ + +#define task_scs(tsk) (task_thread_info(tsk)->scs_base) +#define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) + +void scs_init(void); +int scs_prepare(struct task_struct *tsk, int node); +void scs_release(struct task_struct *tsk); + +static inline void scs_task_reset(struct task_struct *tsk) +{ + /* + * Reset the shadow stack to the base address in case the task + * is reused. + */ + task_scs_sp(tsk) = task_scs(tsk); +} + +static inline unsigned long *__scs_magic(void *s) +{ + return (unsigned long *)(s + SCS_SIZE) - 1; +} + +static inline bool task_scs_end_corrupted(struct task_struct *tsk) +{ + unsigned long *magic = __scs_magic(task_scs(tsk)); + unsigned long sz = task_scs_sp(tsk) - task_scs(tsk); + + return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; +} + +#else /* CONFIG_SHADOW_CALL_STACK */ + +static inline void scs_init(void) {} +static inline void scs_task_reset(struct task_struct *tsk) {} +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } +static inline void scs_release(struct task_struct *tsk) {} +static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; } + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* _LINUX_SCS_H */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 05bacd2ab135..6bb1a3f0258c 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -24,6 +24,14 @@ static inline void clear_siginfo(kernel_siginfo_t *info) #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo)) +static inline void copy_siginfo_to_external(siginfo_t *to, + const kernel_siginfo_t *from) +{ + memcpy(to, from, sizeof(*from)); + memset(((char *)to) + sizeof(struct kernel_siginfo), 0, + SI_EXPANSION_SIZE); +} + int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from); int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from); diff --git a/include/linux/smp.h b/include/linux/smp.h index cbc9162689d0..04019872c7bc 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -227,8 +227,8 @@ static inline int get_boot_cpu_id(void) */ extern void arch_disable_smp_support(void); -extern void arch_enable_nonboot_cpus_begin(void); -extern void arch_enable_nonboot_cpus_end(void); +extern void arch_thaw_secondary_cpus_begin(void); +extern void arch_thaw_secondary_cpus_end(void); void smp_setup_processor_id(void); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 38286de779e3..aac57b5b7c21 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -394,6 +394,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * for example doing DMA mapping. Called from threaded * context. * @transfer_one: transfer a single spi_transfer. + * * - return 0 if the transfer is finished, * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must diff --git a/include/linux/stat.h b/include/linux/stat.h index 528c4baad091..56614af83d4a 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -47,6 +47,7 @@ struct kstat { struct timespec64 ctime; struct timespec64 btime; /* File creation time */ u64 blocks; + u64 mnt_id; }; #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 68ef7638311f..e92176fc8824 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -343,6 +343,7 @@ extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); +extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1815065d52f3..7c354c2955f5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -428,6 +428,8 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); +asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode, + int flags); asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_chroot(const char __user *filename); @@ -1333,11 +1335,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode) return do_fchmodat(AT_FDCWD, filename, mode); } -extern long do_faccessat(int dfd, const char __user *filename, int mode); +long do_faccessat(int dfd, const char __user *filename, int mode, int flags); static inline long ksys_access(const char __user *filename, int mode) { - return do_faccessat(AT_FDCWD, filename, mode); + return do_faccessat(AT_FDCWD, filename, mode, 0); } extern int do_fchownat(int dfd, const char __user *filename, uid_t user, diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 80bb865b3a33..86067dbe7745 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -7,7 +7,7 @@ * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * - * Please see Documentation/filesystems/sysfs.txt for more information. + * Please see Documentation/filesystems/sysfs.rst for more information. */ #ifndef _SYSFS_H_ diff --git a/include/linux/tboot.h b/include/linux/tboot.h index 5424bc6feac8..c7e424766360 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h @@ -121,13 +121,7 @@ struct tboot { #define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\ 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8} -extern struct tboot *tboot; - -static inline int tboot_enabled(void) -{ - return tboot != NULL; -} - +bool tboot_enabled(void); extern void tboot_probe(void); extern void tboot_shutdown(u32 shutdown_type); extern struct acpi_table_header *tboot_get_dmar_table( diff --git a/include/linux/torture.h b/include/linux/torture.h index 6241f59e2d6f..629b66e6c161 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -89,7 +89,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() preempt_schedule() #else -#define torture_preempt_schedule() +#define torture_preempt_schedule() do { } while (0) #endif #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 67f016010aad..9861c89f93be 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -378,6 +378,14 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif +#ifndef user_write_access_begin +#define user_write_access_begin user_access_begin +#define user_write_access_end user_access_end +#endif +#ifndef user_read_access_begin +#define user_read_access_begin user_access_begin +#define user_read_access_end user_access_end +#endif #ifdef CONFIG_HARDENED_USERCOPY void usercopy_warn(const char *name, const char *detail, bool to_user, diff --git a/include/linux/wait.h b/include/linux/wait.h index feeb6be5cad6..898c890fc153 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1149,4 +1149,6 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i (wait)->flags = 0; \ } while (0) +bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg); + #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 417d9f37077a..1464ce6ffa31 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -37,15 +37,15 @@ struct watchdog_governor; * * The watchdog_ops structure contains a list of low-level operations * that control a watchdog device. It also contains the module that owns - * these operations. The start and stop function are mandatory, all other + * these operations. The start function is mandatory, all other * functions are optional. */ struct watchdog_ops { struct module *owner; /* mandatory operations */ int (*start)(struct watchdog_device *); - int (*stop)(struct watchdog_device *); /* optional operations */ + int (*stop)(struct watchdog_device *); int (*ping)(struct watchdog_device *); unsigned int (*status)(struct watchdog_device *); int (*set_timeout)(struct watchdog_device *, unsigned int); diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h index 38956969fd12..b1c839734124 100644 --- a/include/media/cec-notifier.h +++ b/include/media/cec-notifier.h @@ -2,7 +2,7 @@ /* * cec-notifier.h - notify CEC drivers of physical address changes * - * Copyright 2016 Russell King <rmk+kernel@arm.linux.org.uk> + * Copyright 2016 Russell King. * Copyright 2016-2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 97bf4885a962..46754ba9d7b7 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -26,13 +26,9 @@ static inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - if (access_ok(src, len)) - return csum_partial_copy_from_user(src, dst, len, sum, err_ptr); - - if (len) + if (copy_from_user(dst, src, len)) *err_ptr = -EFAULT; - - return sum; + return csum_partial(dst, len, sum); } #endif @@ -42,10 +38,8 @@ static __inline__ __wsum csum_and_copy_to_user { sum = csum_partial(src, len, sum); - if (access_ok(dst, len)) { - if (copy_to_user(dst, src, len) == 0) - return sum; - } + if (copy_to_user(dst, src, len) == 0) + return sum; if (len) *err_ptr = -EFAULT; diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 3a3201e4618e..f4a01305d9a6 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_faccessat2 439 +__SYSCALL(__NR_faccessat2, sys_faccessat2) #undef __NR_syscalls -#define __NR_syscalls 439 +#define __NR_syscalls 440 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 272dc69fa080..e58c9636741b 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -367,8 +367,14 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_READ 37 +/* + * Allow system performance and observability privileged operations + * using perf_events, i915_perf and other kernel subsystems + */ + +#define CAP_PERFMON 38 -#define CAP_LAST_CAP CAP_AUDIT_READ +#define CAP_LAST_CAP CAP_PERFMON #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 34c02e4290fe..c6dd0215482e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -36,6 +36,7 @@ typedef __s64 Elf64_Sxword; #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_PROPERTY 0x6474e553 #define PT_GNU_STACK (PT_LOOS + 0x474e551) @@ -367,6 +368,7 @@ typedef struct elf64_shdr { * Notes used in ET_CORE. Architectures export some of the arch register sets * using the corresponding note types via the PTRACE_GETREGSET and * PTRACE_SETREGSET requests. + * The note name for all these is "LINUX". */ #define NT_PRSTATUS 1 #define NT_PRFPREG 2 @@ -429,6 +431,9 @@ typedef struct elf64_shdr { #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + /* Note header in a PT_NOTE section */ typedef struct elf32_note { Elf32_Word n_namesz; /* Name size */ @@ -443,4 +448,10 @@ typedef struct elf64_note { Elf64_Word n_type; /* Content type */ } Elf64_Nhdr; +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #endif /* _UAPI_LINUX_ELF_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7fde76366ba4..1711e57f7848 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -2,7 +2,7 @@ /* * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool * - * See Documentation/networking/ethtool-netlink.txt in kernel source tree for + * See Documentation/networking/ethtool-netlink.rst in kernel source tree for * doucumentation of the interface. */ diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ca88b7bce553..2f86b2ad6d7e 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -84,10 +84,20 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is + * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to + * unlinkat. The two functions do completely different things and therefore, + * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to + * faccessat would be undefined behavior and thus treating it equivalent to + * AT_EACCESS is valid undefined behavior. + */ #define AT_FDCWD -100 /* Special value used to indicate openat should use the current working directory. */ #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_EACCESS 0x200 /* Test access permitted for + effective IDs, not real IDs. */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index 1acd2b179aef..7e5b5c10a49c 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -308,7 +308,7 @@ struct fw_cdev_event_iso_interrupt_mc { /** * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed * @closure: See &fw_cdev_event_common; - * set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl + * set by``FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE)`` ioctl * @type: %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or * %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED * @handle: Reference by which an allocated resource can be deallocated diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 428c7dde6b4b..fdd632c833b4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virt/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.rst */ union { __u32 irq; @@ -1107,7 +1107,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virt/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.rst. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index ad80a5c885d5..6df9348bb277 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -123,7 +123,10 @@ struct statx { __u32 stx_dev_major; /* ID of device containing file [uncond] */ __u32 stx_dev_minor; /* 0x90 */ - __u64 __spare2[14]; /* Spare space for future expansion */ + __u64 stx_mnt_id; + __u64 __spare2; + /* 0xa0 */ + __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -148,9 +151,19 @@ struct statx { #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ -#define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ + #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ +#ifndef __KERNEL__ +/* + * This is deprecated, and shall remain the same value in the future. To avoid + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) + * instead. + */ +#define STATX_ALL 0x00000fffU +#endif + /* * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * @@ -168,6 +181,7 @@ struct statx { #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ +#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 7b1ec806f8f9..38ab7accb7be 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -36,7 +36,7 @@ #include <linux/types.h> #include <linux/ioctl.h> -/* Documentation/ioctl/ioctl-number.rst */ +/* Documentation/userspace-api/ioctl/ioctl-number.rst */ #define RDMA_IOCTL_MAGIC 0x1b #define RDMA_VERBS_IOCTL \ _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) |