From 8ba9d40b6b2bf62377fd6fce25e9997e42b0317a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:02:36 -0700 Subject: kernel/async.c: fix pr_debug statement An async_func_t returns void - any errors encountered it has to stash somewhere for consumers to discover later. Link: https://lkml.kernel.org/r/20210226124355.2503524-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/async.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/async.c b/kernel/async.c index 33258e6e20f8..45a867b8644a 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -124,7 +124,7 @@ static void async_run_entry_fn(struct work_struct *work) if (initcall_debug && system_state < SYSTEM_RUNNING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - pr_debug("initcall %lli_%pS returned 0 after %lld usecs\n", + pr_debug("initcall %lli_%pS returned after %lld usecs\n", (long long)entry->cookie, entry->func, (long long)ktime_to_ns(delta) >> 10); -- cgit v1.2.3 From 32c93976ac2ee7ecb4b09cc032efe1445d37bd7e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:02:39 -0700 Subject: kernel/cred.c: make init_groups static init_groups is declared in both cred.h and init_task.h, but it is not actually referenced anywhere outside of cred.c where it is defined. So make it static and remove the declarations. Link: https://lkml.kernel.org/r/20210310220102.2484201-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - include/linux/init_task.h | 1 - kernel/cred.c | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/cred.h b/include/linux/cred.h index ac0e5f97d7d8..14971322e1a0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -53,7 +53,6 @@ do { \ groups_free(group_info); \ } while (0) -extern struct group_info init_groups; #ifdef CONFIG_MULTIUSER extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b2412b4d4c20..40fc5813cf93 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,7 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; extern struct nsproxy init_nsproxy; -extern struct group_info init_groups; extern struct cred init_cred; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/kernel/cred.c b/kernel/cred.c index 421b1149c651..e1d274cd741b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -33,7 +33,7 @@ do { \ static struct kmem_cache *cred_jar; /* init to 2 - one for init_task, one to ensure it is never freed */ -struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; +static struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; /* * The initial credentials for the initial task -- cgit v1.2.3 From 5449162ac001a926ad8884882b071601df5edb44 Mon Sep 17 00:00:00 2001 From: Jim Newsome Date: Thu, 6 May 2021 18:04:22 -0700 Subject: do_wait: make PIDTYPE_PID case O(1) instead of O(n) Add a special-case when waiting on a pid (via waitpid, waitid, wait4, etc) to avoid doing an O(n) scan of children and tracees, and instead do an O(1) lookup. This improves performance when waiting on a pid from a thread group with many children and/or tracees. Time to fork and then call waitpid on the child, from a task that already has N children [1]: N | Before | After -----|---------|------ 1 | 74 us | 74 us 20 | 72 us | 75 us 100 | 83 us | 77 us 500 | 99 us | 74 us 1000 | 179 us | 75 us 5000 | 804 us | 79 us 8000 | 1268 us | 78 us [1]: https://lkml.org/lkml/2021/3/12/1567 This can make a substantial performance improvement for applications with a thread that has many children or tracees and frequently needs to wait on them. Tools that use ptrace to intercept syscalls for a large number of processes are likely to fall into this category. In particular this patch was developed while building a ptrace-based second generation of the Shadow emulator [2], for which it allows us to avoid quadratic scaling (without having to use a workaround that introduces a ~40% performance penalty) [3]. Other examples of tools that fall into this category which this patch may help include User Mode Linux [4] and DetTrace [5]. [2]: https://shadow.github.io/ [3]: https://github.com/shadow/shadow/issues/1134#issuecomment-798992292 [4]: https://en.wikipedia.org/wiki/User-mode_Linux [5]: https://github.com/dettrace/dettrace Link: https://lkml.kernel.org/r/20210314231544.9379-1-jnewsome@torproject.org Signed-off-by: James Newsome Reviewed-by: Oleg Nesterov Cc: "Eric W . Biederman" Cc: Christian Brauner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 0596526ed9ea..fd1c04193e18 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1440,9 +1440,48 @@ void __wake_up_parent(struct task_struct *p, struct task_struct *parent) TASK_INTERRUPTIBLE, p); } +static bool is_effectively_child(struct wait_opts *wo, bool ptrace, + struct task_struct *target) +{ + struct task_struct *parent = + !ptrace ? target->real_parent : target->parent; + + return current == parent || (!(wo->wo_flags & __WNOTHREAD) && + same_thread_group(current, parent)); +} + +/* + * Optimization for waiting on PIDTYPE_PID. No need to iterate through child + * and tracee lists to find the target task. + */ +static int do_wait_pid(struct wait_opts *wo) +{ + bool ptrace; + struct task_struct *target; + int retval; + + ptrace = false; + target = pid_task(wo->wo_pid, PIDTYPE_TGID); + if (target && is_effectively_child(wo, ptrace, target)) { + retval = wait_consider_task(wo, ptrace, target); + if (retval) + return retval; + } + + ptrace = true; + target = pid_task(wo->wo_pid, PIDTYPE_PID); + if (target && target->ptrace && + is_effectively_child(wo, ptrace, target)) { + retval = wait_consider_task(wo, ptrace, target); + if (retval) + return retval; + } + + return 0; +} + static long do_wait(struct wait_opts *wo) { - struct task_struct *tsk; int retval; trace_sched_process_wait(wo->wo_pid); @@ -1464,19 +1503,27 @@ repeat: set_current_state(TASK_INTERRUPTIBLE); read_lock(&tasklist_lock); - tsk = current; - do { - retval = do_wait_thread(wo, tsk); - if (retval) - goto end; - retval = ptrace_do_wait(wo, tsk); + if (wo->wo_type == PIDTYPE_PID) { + retval = do_wait_pid(wo); if (retval) goto end; + } else { + struct task_struct *tsk = current; + + do { + retval = do_wait_thread(wo, tsk); + if (retval) + goto end; - if (wo->wo_flags & __WNOTHREAD) - break; - } while_each_thread(current, tsk); + retval = ptrace_do_wait(wo, tsk); + if (retval) + goto end; + + if (wo->wo_flags & __WNOTHREAD) + break; + } while_each_thread(current, tsk); + } read_unlock(&tasklist_lock); notask: -- cgit v1.2.3 From a6895399380ab58d9efd0a0bec2fcb98d77e20bd Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Thu, 6 May 2021 18:04:25 -0700 Subject: kernel/fork.c: simplify copy_mm() All this can happen without a single goto. Link: https://lkml.kernel.org/r/2072685.XptgVkyDqn@devpool47 Signed-off-by: Rolf Eike Beer Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 771e0ea90499..784f7ca7c17e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1396,7 +1396,6 @@ fail_nomem: static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) { struct mm_struct *mm, *oldmm; - int retval; tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; @@ -1423,21 +1422,15 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) if (clone_flags & CLONE_VM) { mmget(oldmm); mm = oldmm; - goto good_mm; + } else { + mm = dup_mm(tsk, current->mm); + if (!mm) + return -ENOMEM; } - retval = -ENOMEM; - mm = dup_mm(tsk, current->mm); - if (!mm) - goto fail_nomem; - -good_mm: tsk->mm = mm; tsk->active_mm = mm; return 0; - -fail_nomem: - return retval; } static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) -- cgit v1.2.3 From a8ca6b1388a91c79dad257a7cc0bc14c009312fe Mon Sep 17 00:00:00 2001 From: Xiaofeng Cao Date: Thu, 6 May 2021 18:04:28 -0700 Subject: kernel/fork.c: fix typos change 'ancestoral' to 'ancestral' change 'reuseable' to 'reusable' delete 'do' grammatically Link: https://lkml.kernel.org/r/20210317082031.11692-1-caoxiaofeng@yulong.com Signed-off-by: Xiaofeng Cao Reviewed-by: Christian Brauner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 784f7ca7c17e..dc06afd725cb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1145,7 +1145,7 @@ void mmput_async(struct mm_struct *mm) * invocations: in mmput() nobody alive left, in execve task is single * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the * mm->exe_file, but does so without using set_mm_exe_file() in order - * to do avoid the need for any locks. + * to avoid the need for any locks. */ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { @@ -1736,7 +1736,7 @@ static int pidfd_release(struct inode *inode, struct file *file) * /proc//status where Pid and NSpid are always shown relative to * the pid namespace of the procfs instance. The difference becomes * obvious when sending around a pidfd between pid namespaces from a - * different branch of the tree, i.e. where no ancestoral relation is + * different branch of the tree, i.e. where no ancestral relation is * present between the pid namespaces: * - create two new pid namespaces ns1 and ns2 in the initial pid * namespace (also take care to create new mount namespaces in the @@ -2728,8 +2728,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs) return false; /* - * - make the CLONE_DETACHED bit reuseable for clone3 - * - make the CSIGNAL bits reuseable for clone3 + * - make the CLONE_DETACHED bit reusable for clone3 + * - make the CSIGNAL bits reusable for clone3 */ if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) return false; -- cgit v1.2.3 From a119b4e5186c283ee13850b65004de6d746a81be Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Thu, 6 May 2021 18:04:35 -0700 Subject: kexec: Add kexec reboot string The purpose is to notify the kernel module for fast reboot. Upstream a patch from the SONiC network operating system [1]. [1]: https://github.com/Azure/sonic-linux-kernel/pull/46 Link: https://lkml.kernel.org/r/20210304124626.13927-1-pmenzel@molgen.mpg.de Signed-off-by: Joe LeVeque Signed-off-by: Paul Menzel Acked-by: Baoquan He Cc: Guohan Lu Cc: Joe LeVeque Cc: Paul Menzel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index a0b6780740c8..f04d04d1b855 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1165,7 +1165,7 @@ int kernel_kexec(void) #endif { kexec_in_progress = true; - kernel_restart_prepare(NULL); + kernel_restart_prepare("kexec reboot"); migrate_to_reboot_cpu(); /* -- cgit v1.2.3 From 31d82c2c787d5cf65fedd35ebbc0c1bd95c1a679 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 6 May 2021 18:04:38 -0700 Subject: kernel: kexec_file: fix error return code of kexec_calculate_store_digests() When vzalloc() returns NULL to sha_regions, no error return code of kexec_calculate_store_digests() is assigned. To fix this bug, ret is assigned with -ENOMEM in this case. Link: https://lkml.kernel.org/r/20210309083904.24321-1-baijiaju1990@gmail.com Fixes: a43cac0d9dc2 ("kexec: split kexec_file syscall code to kexec_file.c") Signed-off-by: Jia-Ju Bai Reported-by: TOTE Robot Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 5c3447cf7ad5..33400ff051a8 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -740,8 +740,10 @@ static int kexec_calculate_store_digests(struct kimage *image) sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); sha_regions = vzalloc(sha_region_sz); - if (!sha_regions) + if (!sha_regions) { + ret = -ENOMEM; goto out_free_desc; + } desc->tfm = tfm; -- cgit v1.2.3 From b2075dbb15d7ae952aeb01331198f4dc45a7e46a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 6 May 2021 18:04:41 -0700 Subject: kexec: dump kmessage before machine_kexec kmsg_dump(KMSG_DUMP_SHUTDOWN) is called before machine_restart(), machine_halt(), and machine_power_off(). The only one that is missing is machine_kexec(). The dmesg output that it contains can be used to study the shutdown performance of both kernel and systemd during kexec reboot. Here is example of dmesg data collected after kexec: root@dplat-cp22:~# cat /sys/fs/pstore/dmesg-ramoops-0 | tail ... [ 70.914592] psci: CPU3 killed (polled 0 ms) [ 70.915705] CPU4: shutdown [ 70.916643] psci: CPU4 killed (polled 4 ms) [ 70.917715] CPU5: shutdown [ 70.918725] psci: CPU5 killed (polled 0 ms) [ 70.919704] CPU6: shutdown [ 70.920726] psci: CPU6 killed (polled 4 ms) [ 70.921642] CPU7: shutdown [ 70.922650] psci: CPU7 killed (polled 0 ms) Link: https://lkml.kernel.org/r/20210319192326.146000-2-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Reviewed-by: Bhupesh Sharma Acked-by: Baoquan He Reviewed-by: Tyler Hicks Cc: James Morris Cc: Sasha Levin Cc: Eric W. Biederman Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index f04d04d1b855..f099baee3578 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1179,6 +1180,7 @@ int kernel_kexec(void) machine_shutdown(); } + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_kexec(kexec_image); #ifdef CONFIG_KEXEC_JUMP -- cgit v1.2.3 From 7a1d55b987dfcbddecdb67eecc76fe555d4348ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 May 2021 18:04:45 -0700 Subject: gcov: combine common code There's a lot of duplicated code between gcc and clang implementations, move it over to fs.c to simplify the code, there's no reason to believe that for small data like this one would not just implement the simple convert_to_gcda() function. Link: https://lkml.kernel.org/r/20210315235453.e3fbb86e99a0.I08a3ee6dbe47ea3e8024956083f162884a958e40@changeid Signed-off-by: Johannes Berg Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/base.c | 49 +++++++++++++++ kernel/gcov/clang.c | 167 +------------------------------------------------- kernel/gcov/fs.c | 116 +++++++++++++++++++++++++++++++++++ kernel/gcov/gcc_4_7.c | 167 +------------------------------------------------- kernel/gcov/gcov.h | 14 ++--- 5 files changed, 171 insertions(+), 342 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 0ffe9f194080..073a3738c5e6 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -49,6 +49,55 @@ void gcov_enable_events(void) mutex_unlock(&gcov_lock); } +/** + * store_gcov_u32 - store 32 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't + * store anything. + */ +size_t store_gcov_u32(void *buffer, size_t off, u32 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + *data = v; + } + + return sizeof(*data); +} + +/** + * store_gcov_u64 - store 64 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. 64 bit numbers are stored as two 32 bit numbers, the low part + * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store + * anything. + */ +size_t store_gcov_u64(void *buffer, size_t off, u64 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + + data[0] = (v & 0xffffffffUL); + data[1] = (v >> 32); + } + + return sizeof(*data) * 2; +} + #ifdef CONFIG_MODULES /* Update list and generate events when modules are unloaded. */ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index c466c7fbdece..1741c65b4fb2 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include "gcov.h" @@ -449,71 +448,6 @@ void gcov_info_free(struct gcov_info *info) } #endif -#define ITER_STRIDE PAGE_SIZE - -/** - * struct gcov_iterator - specifies current file position in logical records - * @info: associated profiling data - * @buffer: buffer containing file data - * @size: size of buffer - * @pos: current position in file - */ -struct gcov_iterator { - struct gcov_info *info; - void *buffer; - size_t size; - loff_t pos; -}; - -/** - * store_gcov_u32 - store 32 bit number in gcov format to buffer - * @buffer: target buffer or NULL - * @off: offset into the buffer - * @v: value to be stored - * - * Number format defined by gcc: numbers are recorded in the 32 bit - * unsigned binary form of the endianness of the machine generating the - * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't - * store anything. - */ -static size_t store_gcov_u32(void *buffer, size_t off, u32 v) -{ - u32 *data; - - if (buffer) { - data = buffer + off; - *data = v; - } - - return sizeof(*data); -} - -/** - * store_gcov_u64 - store 64 bit number in gcov format to buffer - * @buffer: target buffer or NULL - * @off: offset into the buffer - * @v: value to be stored - * - * Number format defined by gcc: numbers are recorded in the 32 bit - * unsigned binary form of the endianness of the machine generating the - * file. 64 bit numbers are stored as two 32 bit numbers, the low part - * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store - * anything. - */ -static size_t store_gcov_u64(void *buffer, size_t off, u64 v) -{ - u32 *data; - - if (buffer) { - data = buffer + off; - - data[0] = (v & 0xffffffffUL); - data[1] = (v >> 32); - } - - return sizeof(*data) * 2; -} - /** * convert_to_gcda - convert profiling data set to gcda file format * @buffer: the buffer to store file data or %NULL if no data should be stored @@ -521,7 +455,7 @@ static size_t store_gcov_u64(void *buffer, size_t off, u64 v) * * Returns the number of bytes that were/would have been stored into the buffer. */ -static size_t convert_to_gcda(char *buffer, struct gcov_info *info) +size_t convert_to_gcda(char *buffer, struct gcov_info *info) { struct gcov_fn_info *fi_ptr; size_t pos = 0; @@ -558,102 +492,3 @@ static size_t convert_to_gcda(char *buffer, struct gcov_info *info) return pos; } - -/** - * gcov_iter_new - allocate and initialize profiling data iterator - * @info: profiling data set to be iterated - * - * Return file iterator on success, %NULL otherwise. - */ -struct gcov_iterator *gcov_iter_new(struct gcov_info *info) -{ - struct gcov_iterator *iter; - - iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); - if (!iter) - goto err_free; - - iter->info = info; - /* Dry-run to get the actual buffer size. */ - iter->size = convert_to_gcda(NULL, info); - iter->buffer = vmalloc(iter->size); - if (!iter->buffer) - goto err_free; - - convert_to_gcda(iter->buffer, info); - - return iter; - -err_free: - kfree(iter); - return NULL; -} - - -/** - * gcov_iter_get_info - return profiling data set for given file iterator - * @iter: file iterator - */ -void gcov_iter_free(struct gcov_iterator *iter) -{ - vfree(iter->buffer); - kfree(iter); -} - -/** - * gcov_iter_get_info - return profiling data set for given file iterator - * @iter: file iterator - */ -struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) -{ - return iter->info; -} - -/** - * gcov_iter_start - reset file iterator to starting position - * @iter: file iterator - */ -void gcov_iter_start(struct gcov_iterator *iter) -{ - iter->pos = 0; -} - -/** - * gcov_iter_next - advance file iterator to next logical record - * @iter: file iterator - * - * Return zero if new position is valid, non-zero if iterator has reached end. - */ -int gcov_iter_next(struct gcov_iterator *iter) -{ - if (iter->pos < iter->size) - iter->pos += ITER_STRIDE; - - if (iter->pos >= iter->size) - return -EINVAL; - - return 0; -} - -/** - * gcov_iter_write - write data for current pos to seq_file - * @iter: file iterator - * @seq: seq_file handle - * - * Return zero on success, non-zero otherwise. - */ -int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) -{ - size_t len; - - if (iter->pos >= iter->size) - return -EINVAL; - - len = ITER_STRIDE; - if (iter->pos + len > iter->size) - len = iter->size - iter->pos; - - seq_write(seq, iter->buffer + iter->pos, len); - - return 0; -} diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 82babf5aa077..2d29e1d1225d 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gcov.h" /** @@ -85,6 +86,121 @@ static int __init gcov_persist_setup(char *str) } __setup("gcov_persist=", gcov_persist_setup); +#define ITER_STRIDE PAGE_SIZE + +/** + * struct gcov_iterator - specifies current file position in logical records + * @info: associated profiling data + * @buffer: buffer containing file data + * @size: size of buffer + * @pos: current position in file + */ +struct gcov_iterator { + struct gcov_info *info; + void *buffer; + size_t size; + loff_t pos; +}; + +/** + * gcov_iter_new - allocate and initialize profiling data iterator + * @info: profiling data set to be iterated + * + * Return file iterator on success, %NULL otherwise. + */ +static struct gcov_iterator *gcov_iter_new(struct gcov_info *info) +{ + struct gcov_iterator *iter; + + iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); + if (!iter) + goto err_free; + + iter->info = info; + /* Dry-run to get the actual buffer size. */ + iter->size = convert_to_gcda(NULL, info); + iter->buffer = vmalloc(iter->size); + if (!iter->buffer) + goto err_free; + + convert_to_gcda(iter->buffer, info); + + return iter; + +err_free: + kfree(iter); + return NULL; +} + + +/** + * gcov_iter_free - free iterator data + * @iter: file iterator + */ +static void gcov_iter_free(struct gcov_iterator *iter) +{ + vfree(iter->buffer); + kfree(iter); +} + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +static struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) +{ + return iter->info; +} + +/** + * gcov_iter_start - reset file iterator to starting position + * @iter: file iterator + */ +static void gcov_iter_start(struct gcov_iterator *iter) +{ + iter->pos = 0; +} + +/** + * gcov_iter_next - advance file iterator to next logical record + * @iter: file iterator + * + * Return zero if new position is valid, non-zero if iterator has reached end. + */ +static int gcov_iter_next(struct gcov_iterator *iter) +{ + if (iter->pos < iter->size) + iter->pos += ITER_STRIDE; + + if (iter->pos >= iter->size) + return -EINVAL; + + return 0; +} + +/** + * gcov_iter_write - write data for current pos to seq_file + * @iter: file iterator + * @seq: seq_file handle + * + * Return zero on success, non-zero otherwise. + */ +static int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) +{ + size_t len; + + if (iter->pos >= iter->size) + return -EINVAL; + + len = ITER_STRIDE; + if (iter->pos + len > iter->size) + len = iter->size - iter->pos; + + seq_write(seq, iter->buffer + iter->pos, len); + + return 0; +} + /* * seq_file.start() implementation for gcov data files. Note that the * gcov_iterator interface is designed to be more restrictive than seq_file diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index c53408a00d0b..1251f2434e90 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "gcov.h" @@ -363,71 +362,6 @@ free_info: kfree(info); } -#define ITER_STRIDE PAGE_SIZE - -/** - * struct gcov_iterator - specifies current file position in logical records - * @info: associated profiling data - * @buffer: buffer containing file data - * @size: size of buffer - * @pos: current position in file - */ -struct gcov_iterator { - struct gcov_info *info; - void *buffer; - size_t size; - loff_t pos; -}; - -/** - * store_gcov_u32 - store 32 bit number in gcov format to buffer - * @buffer: target buffer or NULL - * @off: offset into the buffer - * @v: value to be stored - * - * Number format defined by gcc: numbers are recorded in the 32 bit - * unsigned binary form of the endianness of the machine generating the - * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't - * store anything. - */ -static size_t store_gcov_u32(void *buffer, size_t off, u32 v) -{ - u32 *data; - - if (buffer) { - data = buffer + off; - *data = v; - } - - return sizeof(*data); -} - -/** - * store_gcov_u64 - store 64 bit number in gcov format to buffer - * @buffer: target buffer or NULL - * @off: offset into the buffer - * @v: value to be stored - * - * Number format defined by gcc: numbers are recorded in the 32 bit - * unsigned binary form of the endianness of the machine generating the - * file. 64 bit numbers are stored as two 32 bit numbers, the low part - * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store - * anything. - */ -static size_t store_gcov_u64(void *buffer, size_t off, u64 v) -{ - u32 *data; - - if (buffer) { - data = buffer + off; - - data[0] = (v & 0xffffffffUL); - data[1] = (v >> 32); - } - - return sizeof(*data) * 2; -} - /** * convert_to_gcda - convert profiling data set to gcda file format * @buffer: the buffer to store file data or %NULL if no data should be stored @@ -435,7 +369,7 @@ static size_t store_gcov_u64(void *buffer, size_t off, u64 v) * * Returns the number of bytes that were/would have been stored into the buffer. */ -static size_t convert_to_gcda(char *buffer, struct gcov_info *info) +size_t convert_to_gcda(char *buffer, struct gcov_info *info) { struct gcov_fn_info *fi_ptr; struct gcov_ctr_info *ci_ptr; @@ -481,102 +415,3 @@ static size_t convert_to_gcda(char *buffer, struct gcov_info *info) return pos; } - -/** - * gcov_iter_new - allocate and initialize profiling data iterator - * @info: profiling data set to be iterated - * - * Return file iterator on success, %NULL otherwise. - */ -struct gcov_iterator *gcov_iter_new(struct gcov_info *info) -{ - struct gcov_iterator *iter; - - iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); - if (!iter) - goto err_free; - - iter->info = info; - /* Dry-run to get the actual buffer size. */ - iter->size = convert_to_gcda(NULL, info); - iter->buffer = vmalloc(iter->size); - if (!iter->buffer) - goto err_free; - - convert_to_gcda(iter->buffer, info); - - return iter; - -err_free: - kfree(iter); - return NULL; -} - - -/** - * gcov_iter_get_info - return profiling data set for given file iterator - * @iter: file iterator - */ -void gcov_iter_free(struct gcov_iterator *iter) -{ - vfree(iter->buffer); - kfree(iter); -} - -/** - * gcov_iter_get_info - return profiling data set for given file iterator - * @iter: file iterator - */ -struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) -{ - return iter->info; -} - -/** - * gcov_iter_start - reset file iterator to starting position - * @iter: file iterator - */ -void gcov_iter_start(struct gcov_iterator *iter) -{ - iter->pos = 0; -} - -/** - * gcov_iter_next - advance file iterator to next logical record - * @iter: file iterator - * - * Return zero if new position is valid, non-zero if iterator has reached end. - */ -int gcov_iter_next(struct gcov_iterator *iter) -{ - if (iter->pos < iter->size) - iter->pos += ITER_STRIDE; - - if (iter->pos >= iter->size) - return -EINVAL; - - return 0; -} - -/** - * gcov_iter_write - write data for current pos to seq_file - * @iter: file iterator - * @seq: seq_file handle - * - * Return zero on success, non-zero otherwise. - */ -int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) -{ - size_t len; - - if (iter->pos >= iter->size) - return -EINVAL; - - len = ITER_STRIDE; - if (iter->pos + len > iter->size) - len = iter->size - iter->pos; - - seq_write(seq, iter->buffer + iter->pos, len); - - return 0; -} diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h index 6ab2c1808c9d..912b8ea01d33 100644 --- a/kernel/gcov/gcov.h +++ b/kernel/gcov/gcov.h @@ -48,6 +48,7 @@ struct gcov_info *gcov_info_next(struct gcov_info *info); void gcov_info_link(struct gcov_info *info); void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info); bool gcov_info_within_module(struct gcov_info *info, struct module *mod); +size_t convert_to_gcda(char *buffer, struct gcov_info *info); /* Base interface. */ enum gcov_action { @@ -58,16 +59,9 @@ enum gcov_action { void gcov_event(enum gcov_action action, struct gcov_info *info); void gcov_enable_events(void); -/* Iterator control. */ -struct seq_file; -struct gcov_iterator; - -struct gcov_iterator *gcov_iter_new(struct gcov_info *info); -void gcov_iter_free(struct gcov_iterator *iter); -void gcov_iter_start(struct gcov_iterator *iter); -int gcov_iter_next(struct gcov_iterator *iter); -int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq); -struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter); +/* writing helpers */ +size_t store_gcov_u32(void *buffer, size_t off, u32 v); +size_t store_gcov_u64(void *buffer, size_t off, u64 v); /* gcov_info control. */ void gcov_info_reset(struct gcov_info *info); -- cgit v1.2.3 From 3180c44fe1baf14fc876a4cdad77ea7b51ddc387 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 May 2021 18:04:48 -0700 Subject: gcov: simplify buffer allocation Use just a single vmalloc() with struct_size() instead of a separate kmalloc() for the iter struct. Link: https://lkml.kernel.org/r/20210315235453.b6de4a92096e.Iac40a5166589cefbff8449e466bd1b38ea7a17af@changeid Signed-off-by: Johannes Berg Cc: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/fs.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 2d29e1d1225d..40ea81c0475b 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -97,9 +97,9 @@ __setup("gcov_persist=", gcov_persist_setup); */ struct gcov_iterator { struct gcov_info *info; - void *buffer; size_t size; loff_t pos; + char buffer[]; }; /** @@ -111,25 +111,20 @@ struct gcov_iterator { static struct gcov_iterator *gcov_iter_new(struct gcov_info *info) { struct gcov_iterator *iter; + size_t size; + + /* Dry-run to get the actual buffer size. */ + size = convert_to_gcda(NULL, info); - iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); + iter = vmalloc(struct_size(iter, buffer, size)); if (!iter) - goto err_free; + return NULL; iter->info = info; - /* Dry-run to get the actual buffer size. */ - iter->size = convert_to_gcda(NULL, info); - iter->buffer = vmalloc(iter->size); - if (!iter->buffer) - goto err_free; - + iter->size = size; convert_to_gcda(iter->buffer, info); return iter; - -err_free: - kfree(iter); - return NULL; } @@ -139,8 +134,7 @@ err_free: */ static void gcov_iter_free(struct gcov_iterator *iter) { - vfree(iter->buffer); - kfree(iter); + vfree(iter); } /** -- cgit v1.2.3 From 1391efa952e8b22088f8626fc63ade26767b92d6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 May 2021 18:04:51 -0700 Subject: gcov: use kvmalloc() Using vmalloc() in gcov is really quite wasteful, many of the objects allocated are really small (e.g. I've seen 24 bytes.) Use kvmalloc() to automatically pick the better of kmalloc() or vmalloc() depending on the size. [johannes.berg@intel.com: fix clang-11+ build] Link: https://lkml.kernel.org/r/20210412214210.6e1ecca9cdc5.I24459763acf0591d5e6b31c7e3a59890d802f79c@changeid Link: https://lkml.kernel.org/r/20210315235453.799e7a9d627d.I741d0db096c6f312910f7f1bcdfde0fda20801a4@changeid Signed-off-by: Johannes Berg Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Cc: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/clang.c | 12 ++++++------ kernel/gcov/fs.c | 6 +++--- kernel/gcov/gcc_4_7.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index 1741c65b4fb2..d43ffd0c5ddb 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include "gcov.h" typedef void (*llvm_gcov_callback)(void); @@ -333,8 +333,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) { size_t cv_size; /* counter values size */ - struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), - GFP_KERNEL); + struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), GFP_KERNEL); + if (!fn_dup) return NULL; INIT_LIST_HEAD(&fn_dup->head); @@ -344,7 +344,7 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) goto err_name; cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = vmalloc(cv_size); + fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); if (!fn_dup->counters) goto err_counters; memcpy(fn_dup->counters, fn->counters, cv_size); @@ -368,7 +368,7 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) INIT_LIST_HEAD(&fn_dup->head); cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = vmalloc(cv_size); + fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); if (!fn_dup->counters) { kfree(fn_dup); return NULL; @@ -439,7 +439,7 @@ void gcov_info_free(struct gcov_info *info) struct gcov_fn_info *fn, *tmp; list_for_each_entry_safe(fn, tmp, &info->functions, head) { - vfree(fn->counters); + kvfree(fn->counters); list_del(&fn->head); kfree(fn); } diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 40ea81c0475b..5c3086cad8f9 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include "gcov.h" /** @@ -116,7 +116,7 @@ static struct gcov_iterator *gcov_iter_new(struct gcov_info *info) /* Dry-run to get the actual buffer size. */ size = convert_to_gcda(NULL, info); - iter = vmalloc(struct_size(iter, buffer, size)); + iter = kvmalloc(struct_size(iter, buffer, size), GFP_KERNEL); if (!iter) return NULL; @@ -134,7 +134,7 @@ static struct gcov_iterator *gcov_iter_new(struct gcov_info *info) */ static void gcov_iter_free(struct gcov_iterator *iter) { - vfree(iter); + kvfree(iter); } /** diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 1251f2434e90..460c12b7dfea 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "gcov.h" #if (__GNUC__ >= 10) @@ -309,7 +309,7 @@ struct gcov_info *gcov_info_dup(struct gcov_info *info) cv_size = sizeof(gcov_type) * sci_ptr->num; - dci_ptr->values = vmalloc(cv_size); + dci_ptr->values = kvmalloc(cv_size, GFP_KERNEL); if (!dci_ptr->values) goto err_free; @@ -351,7 +351,7 @@ void gcov_info_free(struct gcov_info *info) ci_ptr = info->functions[fi_idx]->ctrs; for (ct_idx = 0; ct_idx < active; ct_idx++, ci_ptr++) - vfree(ci_ptr->values); + kvfree(ci_ptr->values); kfree(info->functions[fi_idx]); } -- cgit v1.2.3 From 9b472e85d098a40b84dd8b33fbf8a15ab1452025 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 6 May 2021 18:04:54 -0700 Subject: gcov: clang: drop support for clang-10 and older LLVM changed the expected function signatures for llvm_gcda_start_file() and llvm_gcda_emit_function() in the clang-11 release. Drop the older implementations and require folks to upgrade their compiler if they're interested in GCOV support. Link: https://reviews.llvm.org/rGcdd683b516d147925212724b09ec6fb792a40041 Link: https://reviews.llvm.org/rG13a633b438b6500ecad9e4f936ebadf3411d0f44 Link: https://lkml.kernel.org/r/20210312224132.3413602-3-ndesaulniers@google.com Link: https://lkml.kernel.org/r/20210413183113.2977432-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Suggested-by: Nathan Chancellor Acked-by: Peter Oberparleiter Reviewed-by: Nathan Chancellor Reviewed-by: Fangrui Song Cc: Prasad Sodagudi Cc: Johannes Berg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/Kconfig | 1 + kernel/gcov/clang.c | 103 ---------------------------------------------------- 2 files changed, 1 insertion(+), 103 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index f62de2dea8a3..58f87a3092f3 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,6 +4,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS + depends on !CC_IS_CLANG || CLANG_VERSION >= 110000 select CONSTRUCTORS default n help diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index d43ffd0c5ddb..cbb0bed958ab 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -69,16 +69,10 @@ struct gcov_fn_info { u32 ident; u32 checksum; -#if CONFIG_CLANG_VERSION < 110000 - u8 use_extra_checksum; -#endif u32 cfg_checksum; u32 num_counters; u64 *counters; -#if CONFIG_CLANG_VERSION < 110000 - const char *function_name; -#endif }; static struct gcov_info *current_info; @@ -108,16 +102,6 @@ void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush) } EXPORT_SYMBOL(llvm_gcov_init); -#if CONFIG_CLANG_VERSION < 110000 -void llvm_gcda_start_file(const char *orig_filename, const char version[4], - u32 checksum) -{ - current_info->filename = orig_filename; - memcpy(¤t_info->version, version, sizeof(current_info->version)); - current_info->checksum = checksum; -} -EXPORT_SYMBOL(llvm_gcda_start_file); -#else void llvm_gcda_start_file(const char *orig_filename, u32 version, u32 checksum) { current_info->filename = orig_filename; @@ -125,28 +109,7 @@ void llvm_gcda_start_file(const char *orig_filename, u32 version, u32 checksum) current_info->checksum = checksum; } EXPORT_SYMBOL(llvm_gcda_start_file); -#endif -#if CONFIG_CLANG_VERSION < 110000 -void llvm_gcda_emit_function(u32 ident, const char *function_name, - u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum) -{ - struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); - - if (!info) - return; - - INIT_LIST_HEAD(&info->head); - info->ident = ident; - info->checksum = func_checksum; - info->use_extra_checksum = use_extra_checksum; - info->cfg_checksum = cfg_checksum; - if (function_name) - info->function_name = kstrdup(function_name, GFP_KERNEL); - - list_add_tail(&info->head, ¤t_info->functions); -} -#else void llvm_gcda_emit_function(u32 ident, u32 func_checksum, u32 cfg_checksum) { struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -160,7 +123,6 @@ void llvm_gcda_emit_function(u32 ident, u32 func_checksum, u32 cfg_checksum) info->cfg_checksum = cfg_checksum; list_add_tail(&info->head, ¤t_info->functions); } -#endif EXPORT_SYMBOL(llvm_gcda_emit_function); void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters) @@ -291,16 +253,8 @@ int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) !list_is_last(&fn_ptr2->head, &info2->functions)) { if (fn_ptr1->checksum != fn_ptr2->checksum) return false; -#if CONFIG_CLANG_VERSION < 110000 - if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum) - return false; - if (fn_ptr1->use_extra_checksum && - fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) - return false; -#else if (fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) return false; -#endif fn_ptr1 = list_next_entry(fn_ptr1, head); fn_ptr2 = list_next_entry(fn_ptr2, head); } @@ -329,35 +283,6 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) } } -#if CONFIG_CLANG_VERSION < 110000 -static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) -{ - size_t cv_size; /* counter values size */ - struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), GFP_KERNEL); - - if (!fn_dup) - return NULL; - INIT_LIST_HEAD(&fn_dup->head); - - fn_dup->function_name = kstrdup(fn->function_name, GFP_KERNEL); - if (!fn_dup->function_name) - goto err_name; - - cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); - if (!fn_dup->counters) - goto err_counters; - memcpy(fn_dup->counters, fn->counters, cv_size); - - return fn_dup; - -err_counters: - kfree(fn_dup->function_name); -err_name: - kfree(fn_dup); - return NULL; -} -#else static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) { size_t cv_size; /* counter values size */ @@ -378,7 +303,6 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) return fn_dup; } -#endif /** * gcov_info_dup - duplicate profiling data set @@ -419,21 +343,6 @@ err: * gcov_info_free - release memory for profiling data set duplicate * @info: profiling data set duplicate to free */ -#if CONFIG_CLANG_VERSION < 110000 -void gcov_info_free(struct gcov_info *info) -{ - struct gcov_fn_info *fn, *tmp; - - list_for_each_entry_safe(fn, tmp, &info->functions, head) { - kfree(fn->function_name); - vfree(fn->counters); - list_del(&fn->head); - kfree(fn); - } - kfree(info->filename); - kfree(info); -} -#else void gcov_info_free(struct gcov_info *info) { struct gcov_fn_info *fn, *tmp; @@ -446,7 +355,6 @@ void gcov_info_free(struct gcov_info *info) kfree(info->filename); kfree(info); } -#endif /** * convert_to_gcda - convert profiling data set to gcda file format @@ -469,21 +377,10 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info) u32 i; pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); -#if CONFIG_CLANG_VERSION < 110000 - pos += store_gcov_u32(buffer, pos, - fi_ptr->use_extra_checksum ? 3 : 2); -#else pos += store_gcov_u32(buffer, pos, 3); -#endif pos += store_gcov_u32(buffer, pos, fi_ptr->ident); pos += store_gcov_u32(buffer, pos, fi_ptr->checksum); -#if CONFIG_CLANG_VERSION < 110000 - if (fi_ptr->use_extra_checksum) - pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); -#else pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); -#endif - pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE); pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2); for (i = 0; i < fi_ptr->num_counters; i++) -- cgit v1.2.3 From 97f61c8f44ec9020708b97a51188170add4f3084 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:12 -0700 Subject: kernel/resource: make walk_system_ram_res() find all busy IORESOURCE_SYSTEM_RAM resources Patch series "kernel/resource: make walk_system_ram_res() and walk_mem_res() search the whole tree", v2. Playing with kdump+virtio-mem I noticed that kexec_file_load() does not consider System RAM added via dax/kmem and virtio-mem when preparing the elf header for kdump. Looking into the details, the logic used in walk_system_ram_res() and walk_mem_res() seems to be outdated. walk_system_ram_range() already does the right thing, let's change walk_system_ram_res() and walk_mem_res(), and clean up. Loading a kdump kernel via "kexec -p -s" ... will result in the kdump kernel to also dump dax/kmem and virtio-mem added System RAM now. Note: kexec-tools on x86-64 also have to be updated to consider this memory in the kexec_load() case when processing /proc/iomem. This patch (of 3): It used to be true that we can have system RAM (IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY) only on the first level in the resource tree. However, this is no longer holds for driver-managed system RAM (i.e., added via dax/kmem and virtio-mem), which gets added on lower levels, for example, inside device containers. We have two users of walk_system_ram_res(), which currently only consideres the first level: a) kernel/kexec_file.c:kexec_walk_resources() -- We properly skip IORESOURCE_SYSRAM_DRIVER_MANAGED resources via locate_mem_hole_callback(), so even after this change, we won't be placing kexec images onto dax/kmem and virtio-mem added memory. No change. b) arch/x86/kernel/crash.c:fill_up_crash_elf_data() -- we're currently not adding relevant ranges to the crash elf header, resulting in them not getting dumped via kdump. This change fixes loading a crashkernel via kexec_file_load() and including dax/kmem and virtio-mem added System RAM in the crashdump on x86-64. Note that e.g,, arm64 relies on memblock data and, therefore, always considers all added System RAM already. Let's find all IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY resources, making the function behave like walk_system_ram_range(). Link: https://lkml.kernel.org/r/20210325115326.7826-1-david@redhat.com Link: https://lkml.kernel.org/r/20210325115326.7826-2-david@redhat.com Fixes: ebf71552bb0e ("virtio-mem: Add parent resource for all added "System RAM"") Fixes: c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM") Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Baoquan He Cc: Greg Kroah-Hartman Cc: Dan Williams Cc: Daniel Vetter Cc: Andy Shevchenko Cc: Mauro Carvalho Chehab Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Hansen Cc: Keith Busch Cc: Michal Hocko Cc: Qian Cai Cc: Oscar Salvador Cc: Eric Biederman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Tom Lendacky Cc: Brijesh Singh Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 627e61b0c124..4efd6e912279 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -457,7 +457,7 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, { unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true, + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, arg, func); } -- cgit v1.2.3 From 3c9c797534364593b73ba6ab060a014af8934721 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:16 -0700 Subject: kernel/resource: make walk_mem_res() find all busy IORESOURCE_MEM resources It used to be true that we can have system RAM (IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY) only on the first level in the resource tree. However, this is no longer holds for driver-managed system RAM (i.e., added via dax/kmem and virtio-mem), which gets added on lower levels, for example, inside device containers. IORESOURCE_SYSTEM_RAM is defined as IORESOURCE_MEM | IORESOURCE_SYSRAM and just a special type of IORESOURCE_MEM. The function walk_mem_res() only considers the first level and is used in arch/x86/mm/ioremap.c:__ioremap_check_mem() only. We currently fail to identify System RAM added by dax/kmem and virtio-mem as "IORES_MAP_SYSTEM_RAM", for example, allowing for remapping of such "normal RAM" in __ioremap_caller(). Let's find all IORESOURCE_MEM | IORESOURCE_BUSY resources, making the function behave similar to walk_system_ram_res(). Link: https://lkml.kernel.org/r/20210325115326.7826-3-david@redhat.com Fixes: ebf71552bb0e ("virtio-mem: Add parent resource for all added "System RAM"") Fixes: c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM") Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Cc: Greg Kroah-Hartman Cc: Dan Williams Cc: Daniel Vetter Cc: Andy Shevchenko Cc: Mauro Carvalho Chehab Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Hansen Cc: Keith Busch Cc: Michal Hocko Cc: Qian Cai Cc: Oscar Salvador Cc: Eric Biederman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 4efd6e912279..16e0c7e8ed24 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -470,7 +470,7 @@ int walk_mem_res(u64 start, u64 end, void *arg, { unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true, + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, arg, func); } -- cgit v1.2.3 From 97523a4edb7b9dc2be48a24a2387fb1328b29521 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:20 -0700 Subject: kernel/resource: remove first_lvl / siblings_only logic All functions that search for IORESOURCE_SYSTEM_RAM or IORESOURCE_MEM resources now properly consider the whole resource tree, not just the first level. Let's drop the unused first_lvl / siblings_only logic. Remove documentation that indicates that some functions behave differently, all consider the full resource tree now. Link: https://lkml.kernel.org/r/20210325115326.7826-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Reviewed-by: Andy Shevchenko Cc: Greg Kroah-Hartman Cc: Dan Williams Cc: Daniel Vetter Cc: Andy Shevchenko Cc: Mauro Carvalho Chehab Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Hansen Cc: Keith Busch Cc: Michal Hocko Cc: Qian Cai Cc: Oscar Salvador Cc: Eric Biederman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 16e0c7e8ed24..7e00239a023a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -64,12 +64,8 @@ static DEFINE_RWLOCK(resource_lock); static struct resource *bootmem_resource_free; static DEFINE_SPINLOCK(bootmem_resource_lock); -static struct resource *next_resource(struct resource *p, bool sibling_only) +static struct resource *next_resource(struct resource *p) { - /* Caller wants to traverse through siblings only */ - if (sibling_only) - return p->sibling; - if (p->child) return p->child; while (!p->sibling && p->parent) @@ -81,7 +77,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; (*pos)++; - return (void *)next_resource(p, false); + return (void *)next_resource(p); } #ifdef CONFIG_PROC_FS @@ -330,14 +326,10 @@ EXPORT_SYMBOL(release_resource); * of the resource that's within [@start..@end]; if none is found, returns * -ENODEV. Returns -EINVAL for invalid parameters. * - * This function walks the whole tree and not just first level children - * unless @first_lvl is true. - * * @start: start address of the resource searched for * @end: end address of same resource * @flags: flags which the resource must have * @desc: descriptor the resource must have - * @first_lvl: walk only the first level children, if set * @res: return ptr, if resource found * * The caller must specify @start, @end, @flags, and @desc @@ -345,9 +337,8 @@ EXPORT_SYMBOL(release_resource); */ static int find_next_iomem_res(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, - bool first_lvl, struct resource *res) + struct resource *res) { - bool siblings_only = true; struct resource *p; if (!res) @@ -358,7 +349,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, read_lock(&resource_lock); - for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) { + for (p = iomem_resource.child; p; p = next_resource(p)) { /* If we passed the resource we are looking for, stop */ if (p->start > end) { p = NULL; @@ -369,13 +360,6 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, if (p->end < start) continue; - /* - * Now that we found a range that matches what we look for, - * check the flags and the descriptor. If we were not asked to - * use only the first level, start looking at children as well. - */ - siblings_only = first_lvl; - if ((p->flags & flags) != flags) continue; if ((desc != IORES_DESC_NONE) && (desc != p->desc)) @@ -402,14 +386,14 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, - bool first_lvl, void *arg, + void *arg, int (*func)(struct resource *, void *)) { struct resource res; int ret = -EINVAL; while (start < end && - !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) { + !find_next_iomem_res(start, end, flags, desc, &res)) { ret = (*func)(&res, arg); if (ret) break; @@ -431,7 +415,6 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, * @arg: function argument for the callback @func * @func: callback function that is called for each qualifying resource area * - * This walks through whole tree and not just first level children. * All the memory ranges which overlap start,end and also match flags and * desc are valid candidates. * @@ -441,7 +424,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)) { - return __walk_iomem_res_desc(start, end, flags, desc, false, arg, func); + return __walk_iomem_res_desc(start, end, flags, desc, arg, func); } EXPORT_SYMBOL_GPL(walk_iomem_res_desc); @@ -457,8 +440,8 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, { unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, - arg, func); + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, arg, + func); } /* @@ -470,17 +453,14 @@ int walk_mem_res(u64 start, u64 end, void *arg, { unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, - arg, func); + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, arg, + func); } /* * This function calls the @func callback against all memory ranges of type * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. * It is to be used only for System RAM. - * - * This will find System RAM ranges that are children of top-level resources - * in addition to top-level System RAM resources. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) @@ -495,8 +475,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; while (start < end && - !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, - false, &res)) { + !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, &res)) { pfn = PFN_UP(res.start); end_pfn = PFN_DOWN(res.end + 1); if (end_pfn > pfn) -- cgit v1.2.3 From d486ccb2522fc22f04f191cac99a844f92d56a7e Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 6 May 2021 18:05:24 -0700 Subject: kernel/resource: allow region_intersects users to hold resource_lock Introduce a version of region_intersects() that can be called with the resource_lock already held. This will be used in a future fix to __request_free_mem_region(). [akpm@linux-foundation.org: make __region_intersects static] Link: https://lkml.kernel.org/r/20210419070109.4780-1-apopple@nvidia.com Signed-off-by: Alistair Popple Cc: David Hildenbrand Cc: Daniel Vetter Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: John Hubbard Cc: Jerome Glisse Cc: Balbir Singh Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 52 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 7e00239a023a..f4aeeda7ee28 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -502,6 +502,34 @@ int __weak page_is_ram(unsigned long pfn) } EXPORT_SYMBOL_GPL(page_is_ram); +static int __region_intersects(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc) +{ + struct resource res; + int type = 0; int other = 0; + struct resource *p; + + res.start = start; + res.end = start + size - 1; + + for (p = iomem_resource.child; p ; p = p->sibling) { + bool is_type = (((p->flags & flags) == flags) && + ((desc == IORES_DESC_NONE) || + (desc == p->desc))); + + if (resource_overlaps(p, &res)) + is_type ? type++ : other++; + } + + if (type == 0) + return REGION_DISJOINT; + + if (other == 0) + return REGION_INTERSECTS; + + return REGION_MIXED; +} + /** * region_intersects() - determine intersection of region with known resources * @start: region start address @@ -525,31 +553,13 @@ EXPORT_SYMBOL_GPL(page_is_ram); int region_intersects(resource_size_t start, size_t size, unsigned long flags, unsigned long desc) { - struct resource res; - int type = 0; int other = 0; - struct resource *p; - - res.start = start; - res.end = start + size - 1; + int ret; read_lock(&resource_lock); - for (p = iomem_resource.child; p ; p = p->sibling) { - bool is_type = (((p->flags & flags) == flags) && - ((desc == IORES_DESC_NONE) || - (desc == p->desc))); - - if (resource_overlaps(p, &res)) - is_type ? type++ : other++; - } + ret = __region_intersects(start, size, flags, desc); read_unlock(&resource_lock); - if (type == 0) - return REGION_DISJOINT; - - if (other == 0) - return REGION_INTERSECTS; - - return REGION_MIXED; + return ret; } EXPORT_SYMBOL_GPL(region_intersects); -- cgit v1.2.3 From 63cdafe0af982e7da9ded37ccf21109a02bc6832 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 6 May 2021 18:05:27 -0700 Subject: kernel/resource: refactor __request_region to allow external locking Refactor the portion of __request_region() done whilst holding the resource_lock into a separate function to allow callers to hold the lock. Link: https://lkml.kernel.org/r/20210419070109.4780-2-apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: David Hildenbrand Cc: Balbir Singh Cc: Daniel Vetter Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: Jerome Glisse Cc: John Hubbard Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index f4aeeda7ee28..c8e473b68f17 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1160,31 +1160,16 @@ struct address_space *iomem_get_mapping(void) return smp_load_acquire(&iomem_inode)->i_mapping; } -/** - * __request_region - create a new busy resource region - * @parent: parent resource descriptor - * @start: resource start address - * @n: resource region size - * @name: reserving caller's ID string - * @flags: IO resource flags - */ -struct resource * __request_region(struct resource *parent, +static int __request_region_locked(struct resource *res, struct resource *parent, resource_size_t start, resource_size_t n, const char *name, int flags) { DECLARE_WAITQUEUE(wait, current); - struct resource *res = alloc_resource(GFP_KERNEL); - struct resource *orig_parent = parent; - - if (!res) - return NULL; res->name = name; res->start = start; res->end = start + n - 1; - write_lock(&resource_lock); - for (;;) { struct resource *conflict; @@ -1220,13 +1205,40 @@ struct resource * __request_region(struct resource *parent, continue; } /* Uhhuh, that didn't work out.. */ - free_resource(res); - res = NULL; - break; + return -EBUSY; } + + return 0; +} + +/** + * __request_region - create a new busy resource region + * @parent: parent resource descriptor + * @start: resource start address + * @n: resource region size + * @name: reserving caller's ID string + * @flags: IO resource flags + */ +struct resource *__request_region(struct resource *parent, + resource_size_t start, resource_size_t n, + const char *name, int flags) +{ + struct resource *res = alloc_resource(GFP_KERNEL); + int ret; + + if (!res) + return NULL; + + write_lock(&resource_lock); + ret = __request_region_locked(res, parent, start, n, name, flags); write_unlock(&resource_lock); - if (res && orig_parent == &iomem_resource) + if (ret) { + free_resource(res); + return NULL; + } + + if (parent == &iomem_resource) revoke_iomem(res); return res; -- cgit v1.2.3 From 56fd94919b8bfdbe162f78920b4ebc72b4ce2f39 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 6 May 2021 18:05:30 -0700 Subject: kernel/resource: fix locking in request_free_mem_region request_free_mem_region() is used to find an empty range of physical addresses for hotplugging ZONE_DEVICE memory. It does this by iterating over the range of possible addresses using region_intersects() to see if the range is free before calling request_mem_region() to allocate the region. However the resource_lock is dropped between these two calls meaning by the time request_mem_region() is called in request_free_mem_region() another thread may have already reserved the requested region. This results in unexpected failures and a message in the kernel log from hitting this condition: /* * mm/hmm.c reserves physical addresses which then * become unavailable to other users. Conflicts are * not expected. Warn to aid debugging if encountered. */ if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { pr_warn("Unaddressable device %s %pR conflicts with %pR", conflict->name, conflict, res); These unexpected failures can be corrected by holding resource_lock across the two calls. This also requires memory allocation to be performed prior to taking the lock. Link: https://lkml.kernel.org/r/20210419070109.4780-3-apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: David Hildenbrand Cc: Balbir Singh Cc: Daniel Vetter Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: Jerome Glisse Cc: John Hubbard Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index c8e473b68f17..028a5ab18818 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1780,25 +1780,56 @@ static struct resource *__request_free_mem_region(struct device *dev, { resource_size_t end, addr; struct resource *res; + struct region_devres *dr = NULL; size = ALIGN(size, 1UL << PA_SECTION_SHIFT); end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1); addr = end - size + 1UL; + res = alloc_resource(GFP_KERNEL); + if (!res) + return ERR_PTR(-ENOMEM); + + if (dev) { + dr = devres_alloc(devm_region_release, + sizeof(struct region_devres), GFP_KERNEL); + if (!dr) { + free_resource(res); + return ERR_PTR(-ENOMEM); + } + } + + write_lock(&resource_lock); for (; addr > size && addr >= base->start; addr -= size) { - if (region_intersects(addr, size, 0, IORES_DESC_NONE) != + if (__region_intersects(addr, size, 0, IORES_DESC_NONE) != REGION_DISJOINT) continue; - if (dev) - res = devm_request_mem_region(dev, addr, size, name); - else - res = request_mem_region(addr, size, name); - if (!res) - return ERR_PTR(-ENOMEM); + if (!__request_region_locked(res, &iomem_resource, addr, size, + name, 0)) + break; + + if (dev) { + dr->parent = &iomem_resource; + dr->start = addr; + dr->n = size; + devres_add(dev, dr); + } + res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; + write_unlock(&resource_lock); + + /* + * A driver is claiming this region so revoke any mappings. + */ + revoke_iomem(res); return res; } + write_unlock(&resource_lock); + + free_resource(res); + if (dr) + devres_free(dr); return ERR_PTR(-ERANGE); } -- cgit v1.2.3 From 07416af11dd85ca61abe60155ace37ced1233617 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:36 -0700 Subject: kernel/async.c: stop guarding pr_debug() statements It's currently nigh impossible to get these pr_debug()s to print something. Being guarded by initcall_debug means one has to enable tons of other debug output during boot, and the system_state condition further means it's impossible to get them when loading modules later. Also, the compiler can't know that these global conditions do not change, so there are W=2 warnings kernel/async.c:125:9: warning: `calltime' may be used uninitialized in this function [-Wmaybe-uninitialized] kernel/async.c:300:9: warning: `starttime' may be used uninitialized in this function [-Wmaybe-uninitialized] Make it possible, for a DYNAMIC_DEBUG kernel, to get these to print their messages by booting with appropriate 'dyndbg="file async.c +p"' command line argument. For a non-DYNAMIC_DEBUG kernel, pr_debug() compiles to nothing. This does cost doing an unconditional ktime_get() for the starttime value, but the corresponding ktime_get for the end time can be elided by factoring it into a function which only gets called if the printk() arguments end up being evaluated. Link: https://lkml.kernel.org/r/20210309151723.1907838-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/async.c | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/async.c b/kernel/async.c index 45a867b8644a..4b5971142922 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -78,6 +78,12 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done); static atomic_t entry_count; +static long long microseconds_since(ktime_t start) +{ + ktime_t now = ktime_get(); + return ktime_to_ns(ktime_sub(now, start)) >> 10; +} + static async_cookie_t lowest_in_progress(struct async_domain *domain) { struct async_entry *first = NULL; @@ -111,24 +117,18 @@ static void async_run_entry_fn(struct work_struct *work) struct async_entry *entry = container_of(work, struct async_entry, work); unsigned long flags; - ktime_t calltime, delta, rettime; + ktime_t calltime; /* 1) run (and print duration) */ - if (initcall_debug && system_state < SYSTEM_RUNNING) { - pr_debug("calling %lli_%pS @ %i\n", - (long long)entry->cookie, - entry->func, task_pid_nr(current)); - calltime = ktime_get(); - } + pr_debug("calling %lli_%pS @ %i\n", (long long)entry->cookie, + entry->func, task_pid_nr(current)); + calltime = ktime_get(); + entry->func(entry->data, entry->cookie); - if (initcall_debug && system_state < SYSTEM_RUNNING) { - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - pr_debug("initcall %lli_%pS returned after %lld usecs\n", - (long long)entry->cookie, - entry->func, - (long long)ktime_to_ns(delta) >> 10); - } + + pr_debug("initcall %lli_%pS returned after %lld usecs\n", + (long long)entry->cookie, entry->func, + microseconds_since(calltime)); /* 2) remove self from the pending queues */ spin_lock_irqsave(&async_lock, flags); @@ -287,23 +287,15 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); */ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain) { - ktime_t starttime, delta, endtime; + ktime_t starttime; - if (initcall_debug && system_state < SYSTEM_RUNNING) { - pr_debug("async_waiting @ %i\n", task_pid_nr(current)); - starttime = ktime_get(); - } + pr_debug("async_waiting @ %i\n", task_pid_nr(current)); + starttime = ktime_get(); wait_event(async_done, lowest_in_progress(domain) >= cookie); - if (initcall_debug && system_state < SYSTEM_RUNNING) { - endtime = ktime_get(); - delta = ktime_sub(endtime, starttime); - - pr_debug("async_continuing @ %i after %lli usec\n", - task_pid_nr(current), - (long long)ktime_to_ns(delta) >> 10); - } + pr_debug("async_continuing @ %i after %lli usec\n", task_pid_nr(current), + microseconds_since(starttime)); } EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); -- cgit v1.2.3 From a065c0faacb1e472cd4e048986407d1b177373a2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:39 -0700 Subject: kernel/async.c: remove async_unregister_domain() No callers in the tree. Link: https://lkml.kernel.org/r/20210309151723.1907838-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/async.h | 1 - kernel/async.c | 18 ------------------ 2 files changed, 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/async.h b/include/linux/async.h index 0a17cd27f348..cce4ad31e8fc 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -112,7 +112,6 @@ async_schedule_dev_domain(async_func_t func, struct device *dev, return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } -void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); diff --git a/kernel/async.c b/kernel/async.c index 4b5971142922..b8d7a663497f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -245,24 +245,6 @@ void async_synchronize_full(void) } EXPORT_SYMBOL_GPL(async_synchronize_full); -/** - * async_unregister_domain - ensure no more anonymous waiters on this domain - * @domain: idle domain to flush out of any async_synchronize_full instances - * - * async_synchronize_{cookie|full}_domain() are not flushed since callers - * of these routines should know the lifetime of @domain - * - * Prefer ASYNC_DOMAIN_EXCLUSIVE() declarations over flushing - */ -void async_unregister_domain(struct async_domain *domain) -{ - spin_lock_irq(&async_lock); - WARN_ON(!domain->registered || !list_empty(&domain->pending)); - domain->registered = 0; - spin_unlock_irq(&async_lock); -} -EXPORT_SYMBOL_GPL(async_unregister_domain); - /** * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain * @domain: the domain to synchronize -- cgit v1.2.3 From e7cb072eb988e46295512617c39d004f9e1c26f8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:42 -0700 Subject: init/initramfs.c: do unpacking asynchronously Patch series "background initramfs unpacking, and CONFIG_MODPROBE_PATH", v3. These two patches are independent, but better-together. The second is a rather trivial patch that simply allows the developer to change "/sbin/modprobe" to something else - e.g. the empty string, so that all request_module() during early boot return -ENOENT early, without even spawning a usermode helper, needlessly synchronizing with the initramfs unpacking. The first patch delegates decompressing the initramfs to a worker thread, allowing do_initcalls() in main.c to proceed to the device_ and late_ initcalls without waiting for that decompression (and populating of rootfs) to finish. Obviously, some of those later calls may rely on the initramfs being available, so I've added synchronization points in the firmware loader and usermodehelper paths - there might be other places that would need this, but so far no one has been able to think of any places I have missed. There's not much to win if most of the functionality needed during boot is only available as modules. But systems with a custom-made .config and initramfs can boot faster, partly due to utilizing more than one cpu earlier, partly by avoiding known-futile modprobe calls (which would still trigger synchronization with the initramfs unpacking, thus eliminating most of the first benefit). This patch (of 2): Most of the boot process doesn't actually need anything from the initramfs, until of course PID1 is to be executed. So instead of doing the decompressing and populating of the initramfs synchronously in populate_rootfs() itself, push that off to a worker thread. This is primarily motivated by an embedded ppc target, where unpacking even the rather modest sized initramfs takes 0.6 seconds, which is long enough that the external watchdog becomes unhappy that it doesn't get attention soon enough. By doing the initramfs decompression in a worker thread, we get to do the device_initcalls and hence start petting the watchdog much sooner. Normal desktops might benefit as well. On my mostly stock Ubuntu kernel, my initramfs is a 26M xz-compressed blob, decompressing to around 126M. That takes almost two seconds: [ 0.201454] Trying to unpack rootfs image as initramfs... [ 1.976633] Freeing initrd memory: 29416K Before this patch, these lines occur consecutively in dmesg. With this patch, the timestamps on these two lines is roughly the same as above, but with 172 lines inbetween - so more than one cpu has been kept busy doing work that would otherwise only happen after the populate_rootfs() finished. Should one of the initcalls done after rootfs_initcall time (i.e., device_ and late_ initcalls) need something from the initramfs (say, a kernel module or a firmware blob), it will simply wait for the initramfs unpacking to be done before proceeding, which should in theory make this completely safe. But if some driver pokes around in the filesystem directly and not via one of the official kernel interfaces (i.e. request_firmware*(), call_usermodehelper*) that theory may not hold - also, I certainly might have missed a spot when sprinkling wait_for_initramfs(). So there is an escape hatch in the form of an initramfs_async= command line parameter. Link: https://lkml.kernel.org/r/20210313212528.2956377-1-linux@rasmusvillemoes.dk Link: https://lkml.kernel.org/r/20210313212528.2956377-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Luis Chamberlain Cc: Jessica Yu Cc: Borislav Petkov Cc: Jonathan Corbet Cc: Greg Kroah-Hartman Cc: Nick Desaulniers Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 12 ++++++++ drivers/base/firmware_loader/main.c | 2 ++ include/linux/initrd.h | 2 ++ init/initramfs.c | 38 ++++++++++++++++++++++++- init/main.c | 1 + kernel/umh.c | 2 ++ 6 files changed, 56 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d93fbc1c1917..7866cc1bd4a9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1833,6 +1833,18 @@ initcall functions. Useful for debugging built-in modules and initcalls. + initramfs_async= [KNL] + Format: + Default: 1 + This parameter controls whether the initramfs + image is unpacked asynchronously, concurrently + with devices being probed and + initialized. This should normally just work, + but as a debugging aid, one can get the + historical behaviour of the initramfs + unpacking being completed before device_ and + late_ initcalls. + initrd= [BOOT] Specify the location of the initial ramdisk initrdmem= [KNL] Specify a physical address and size from which to diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 78355095e00d..4fdb8219cd08 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -504,6 +505,7 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv, if (!path) return -ENOMEM; + wait_for_initramfs(); for (i = 0; i < ARRAY_SIZE(fw_path); i++) { size_t file_size = 0; size_t *file_size_ptr = NULL; diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 85c15717af34..1bbe9af48dc3 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -20,8 +20,10 @@ extern void free_initrd_mem(unsigned long, unsigned long); #ifdef CONFIG_BLK_DEV_INITRD extern void __init reserve_initrd_mem(void); +extern void wait_for_initramfs(void); #else static inline void __init reserve_initrd_mem(void) {} +static inline void wait_for_initramfs(void) {} #endif extern phys_addr_t phys_initrd_start; diff --git a/init/initramfs.c b/init/initramfs.c index d677e8e717f1..af27abc59643 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -541,6 +542,14 @@ static int __init keepinitrd_setup(char *__unused) __setup("keepinitrd", keepinitrd_setup); #endif +static bool __initdata initramfs_async = true; +static int __init initramfs_async_setup(char *str) +{ + strtobool(str, &initramfs_async); + return 1; +} +__setup("initramfs_async=", initramfs_async_setup); + extern char __initramfs_start[]; extern unsigned long __initramfs_size; #include @@ -658,7 +667,7 @@ static void __init populate_initrd_image(char *err) } #endif /* CONFIG_BLK_DEV_RAM */ -static int __init populate_rootfs(void) +static void __init do_populate_rootfs(void *unused, async_cookie_t cookie) { /* Load the built in initramfs */ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); @@ -693,6 +702,33 @@ done: initrd_end = 0; flush_delayed_fput(); +} + +static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain); +static async_cookie_t initramfs_cookie; + +void wait_for_initramfs(void) +{ + if (!initramfs_cookie) { + /* + * Something before rootfs_initcall wants to access + * the filesystem/initramfs. Probably a bug. Make a + * note, avoid deadlocking the machine, and let the + * caller's access fail as it used to. + */ + pr_warn_once("wait_for_initramfs() called before rootfs_initcalls\n"); + return; + } + async_synchronize_cookie_domain(initramfs_cookie + 1, &initramfs_domain); +} +EXPORT_SYMBOL_GPL(wait_for_initramfs); + +static int __init populate_rootfs(void) +{ + initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL, + &initramfs_domain); + if (!initramfs_async) + wait_for_initramfs(); return 0; } rootfs_initcall(populate_rootfs); diff --git a/init/main.c b/init/main.c index dd11bfd10ead..11d34ccf5786 100644 --- a/init/main.c +++ b/init/main.c @@ -1561,6 +1561,7 @@ static noinline void __init kernel_init_freeable(void) kunit_run_all_tests(); + wait_for_initramfs(); console_on_rootfs(); /* diff --git a/kernel/umh.c b/kernel/umh.c index 3f646613a9d3..61f6b82c354b 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -107,6 +108,7 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); + wait_for_initramfs(); retval = kernel_execve(sub_info->path, (const char *const *)sub_info->argv, (const char *const *)sub_info->envp); -- cgit v1.2.3 From 17652f4240f7a501ecc13e9fdb06982569cde51f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:45 -0700 Subject: modules: add CONFIG_MODPROBE_PATH Allow the developer to specifiy the initial value of the modprobe_path[] string. This can be used to set it to the empty string initially, thus effectively disabling request_module() during early boot until userspace writes a new value via the /proc/sys/kernel/modprobe interface. [1] When building a custom kernel (often for an embedded target), it's normal to build everything into the kernel that is needed for booting, and indeed the initramfs often contains no modules at all, so every such request_module() done before userspace init has mounted the real rootfs is a waste of time. This is particularly useful when combined with the previous patch, which made the initramfs unpacking asynchronous - for that to work, it had to make any usermodehelper call wait for the unpacking to finish before attempting to invoke the userspace helper. By eliminating all such (known-to-be-futile) calls of usermodehelper, the initramfs unpacking and the {device,late}_initcalls can proceed in parallel for much longer. For a relatively slow ppc board I'm working on, the two patches combined lead to 0.2s faster boot - but more importantly, the fact that the initramfs unpacking proceeds completely in the background while devices get probed means I get to handle the gpio watchdog in time without getting reset. [1] __request_module() already has an early -ENOENT return when modprobe_path is the empty string. Link: https://lkml.kernel.org/r/20210313212528.2956377-3-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Greg Kroah-Hartman Acked-by: Jessica Yu Acked-by: Luis Chamberlain Cc: Borislav Petkov Cc: Jonathan Corbet Cc: Nick Desaulniers Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 12 ++++++++++++ kernel/kmod.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/init/Kconfig b/init/Kconfig index 1413413fcb9f..d83cb634c24f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2299,6 +2299,18 @@ config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS If unsure, say N. +config MODPROBE_PATH + string "Path to modprobe binary" + default "/sbin/modprobe" + help + When kernel code requests a module, it does so by calling + the "modprobe" userspace utility. This option allows you to + set the path where that binary is found. This can be changed + at runtime via the sysctl file + /proc/sys/kernel/modprobe. Setting this to the empty string + removes the kernel's ability to request modules (but + userspace can still load modules explicitly). + config TRIM_UNUSED_KSYMS bool "Trim unused exported kernel symbols" if EXPERT depends on !COMPILE_TEST diff --git a/kernel/kmod.c b/kernel/kmod.c index 3cd075ce2a1e..b717134ebe17 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -58,7 +58,7 @@ static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); /* modprobe_path is set via /proc/sys. */ -char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; +char modprobe_path[KMOD_PATH_LEN] = CONFIG_MODPROBE_PATH; static void free_modprobe_argv(struct subprocess_info *info) { -- cgit v1.2.3 From bbcd53c960713507ae764bf81970651b5577b95a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:55 -0700 Subject: drivers/char: remove /dev/kmem for good Patch series "drivers/char: remove /dev/kmem for good". Exploring /dev/kmem and /dev/mem in the context of memory hot(un)plug and memory ballooning, I started questioning the existence of /dev/kmem. Comparing it with the /proc/kcore implementation, it does not seem to be able to deal with things like a) Pages unmapped from the direct mapping (e.g., to be used by secretmem) -> kern_addr_valid(). virt_addr_valid() is not sufficient. b) Special cases like gart aperture memory that is not to be touched -> mem_pfn_is_ram() Unless I am missing something, it's at least broken in some cases and might fault/crash the machine. Looks like its existence has been questioned before in 2005 and 2010 [1], after ~11 additional years, it might make sense to revive the discussion. CONFIG_DEVKMEM is only enabled in a single defconfig (on purpose or by mistake?). All distributions disable it: in Ubuntu it has been disabled for more than 10 years, in Debian since 2.6.31, in Fedora at least starting with FC3, in RHEL starting with RHEL4, in SUSE starting from 15sp2, and OpenSUSE has it disabled as well. 1) /dev/kmem was popular for rootkits [2] before it got disabled basically everywhere. Ubuntu documents [3] "There is no modern user of /dev/kmem any more beyond attackers using it to load kernel rootkits.". RHEL documents in a BZ [5] "it served no practical purpose other than to serve as a potential security problem or to enable binary module drivers to access structures/functions they shouldn't be touching" 2) /proc/kcore is a decent interface to have a controlled way to read kernel memory for debugging puposes. (will need some extensions to deal with memory offlining/unplug, memory ballooning, and poisoned pages, though) 3) It might be useful for corner case debugging [1]. KDB/KGDB might be a better fit, especially, to write random memory; harder to shoot yourself into the foot. 4) "Kernel Memory Editor" [4] hasn't seen any updates since 2000 and seems to be incompatible with 64bit [1]. For educational purposes, /proc/kcore might be used to monitor value updates -- or older kernels can be used. 5) It's broken on arm64, and therefore, completely disabled there. Looks like it's essentially unused and has been replaced by better suited interfaces for individual tasks (/proc/kcore, KDB/KGDB). Let's just remove it. [1] https://lwn.net/Articles/147901/ [2] https://www.linuxjournal.com/article/10505 [3] https://wiki.ubuntu.com/Security/Features#A.2Fdev.2Fkmem_disabled [4] https://sourceforge.net/projects/kme/ [5] https://bugzilla.redhat.com/show_bug.cgi?id=154796 Link: https://lkml.kernel.org/r/20210324102351.6932-1-david@redhat.com Link: https://lkml.kernel.org/r/20210324102351.6932-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: "Alexander A. Klimov" Cc: Alexander Viro Cc: Alexandre Belloni Cc: Andrew Lunn Cc: Andrey Zhizhikin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Brian Cain Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Corentin Labbe Cc: "David S. Miller" Cc: "Eric W. Biederman" Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Gregory Clement Cc: Heiko Carstens Cc: Helge Deller Cc: Hillf Danton Cc: huang ying Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: "James E.J. Bottomley" Cc: James Troup Cc: Jiaxun Yang Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Kairui Song Cc: Krzysztof Kozlowski Cc: Kuninori Morimoto Cc: Liviu Dudau Cc: Lorenzo Pieralisi Cc: Luc Van Oostenryck Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Mike Rapoport Cc: Mikulas Patocka Cc: Minchan Kim Cc: Niklas Schnelle Cc: Oleksiy Avramchenko Cc: openrisc@lists.librecores.org Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: "Pavel Machek (CIP)" Cc: Pavel Machek Cc: "Peter Zijlstra (Intel)" Cc: Pierre Morel Cc: Randy Dunlap Cc: Richard Henderson Cc: Rich Felker Cc: Robert Richter Cc: Rob Herring Cc: Russell King Cc: Sam Ravnborg Cc: Sebastian Andrzej Siewior Cc: Sebastian Hesselbarth Cc: sparclinux@vger.kernel.org Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Steven Rostedt Cc: Sudeep Holla Cc: Theodore Dubois Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Viresh Kumar Cc: William Cohen Cc: Xiaoming Ni Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/devices.txt | 2 +- arch/arm/configs/dove_defconfig | 1 - arch/arm/configs/magician_defconfig | 1 - arch/arm/configs/moxart_defconfig | 1 - arch/arm/configs/mps2_defconfig | 1 - arch/arm/configs/mvebu_v5_defconfig | 1 - arch/arm/configs/xcep_defconfig | 1 - arch/hexagon/configs/comet_defconfig | 1 - arch/m68k/configs/amcore_defconfig | 1 - arch/openrisc/configs/or1ksim_defconfig | 1 - arch/sh/configs/edosk7705_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - arch/sh/configs/sh7724_generic_defconfig | 1 - arch/sh/configs/sh7770_generic_defconfig | 1 - arch/sh/configs/sh7785lcr_32bit_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - arch/xtensa/configs/xip_kc705_defconfig | 1 - drivers/char/Kconfig | 10 -- drivers/char/mem.c | 231 ------------------------------ include/linux/fs.h | 2 +- include/linux/vmalloc.h | 2 +- kernel/configs/android-base.config | 1 - mm/ksm.c | 2 +- mm/vmalloc.c | 2 +- 25 files changed, 5 insertions(+), 264 deletions(-) (limited to 'kernel') diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt index ef41f77cb979..9c2be821c225 100644 --- a/Documentation/admin-guide/devices.txt +++ b/Documentation/admin-guide/devices.txt @@ -4,7 +4,7 @@ 1 char Memory devices 1 = /dev/mem Physical memory access - 2 = /dev/kmem Kernel virtual memory access + 2 = /dev/kmem OBSOLETE - replaced by /proc/kcore 3 = /dev/null Null device 4 = /dev/port I/O port access 5 = /dev/zero Null byte source diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index e70c997d5f4c..b935162a8bba 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -63,7 +63,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_MOUSE_PS2 is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=16 -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_RUNTIME_UARTS=2 diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig index b4670d42f378..abde1fb23b20 100644 --- a/arch/arm/configs/magician_defconfig +++ b/arch/arm/configs/magician_defconfig @@ -72,7 +72,6 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y CONFIG_INPUT_UINPUT=m # CONFIG_SERIO is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_PXA=y # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 6834e97af348..eacc089d86c5 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -79,7 +79,6 @@ CONFIG_INPUT_EVBUG=y # CONFIG_SERIO is not set # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig index 1d923dbb9928..89f4a6ff30bd 100644 --- a/arch/arm/configs/mps2_defconfig +++ b/arch/arm/configs/mps2_defconfig @@ -69,7 +69,6 @@ CONFIG_SMSC911X=y # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_MPS2_UART_CONSOLE=y CONFIG_SERIAL_MPS2_UART=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 4f16716bfc32..d57ff30dabff 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -100,7 +100,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set CONFIG_LEGACY_PTY_COUNT=16 -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_RUNTIME_UARTS=2 diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index f1fbdfc5c8c6..4d8e7f2eaef7 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -53,7 +53,6 @@ CONFIG_NET_ETHERNET=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_PXA=y CONFIG_SERIAL_PXA_CONSOLE=y # CONFIG_LEGACY_PTYS is not set diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index f19ae2ab0aaa..c5a214716a38 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -34,7 +34,6 @@ CONFIG_NET_ETHERNET=y # CONFIG_SERIO is not set # CONFIG_CONSOLE_TRANSLATIONS is not set CONFIG_LEGACY_PTY_COUNT=64 -# CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set CONFIG_SPI=y CONFIG_SPI_DEBUG=y diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 3a84f24d41c8..6d9ed2198170 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -60,7 +60,6 @@ CONFIG_DM9000=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_DEVMEM is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_BAUDRATE=115200 CONFIG_SERIAL_MCF_CONSOLE=y diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig index 75f2da324d0e..6e1e004047c7 100644 --- a/arch/openrisc/configs/or1ksim_defconfig +++ b/arch/openrisc/configs/or1ksim_defconfig @@ -43,7 +43,6 @@ CONFIG_MICREL_PHY=y # CONFIG_SERIO is not set # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index ef7cc31997b1..9ee35269bee2 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -23,7 +23,6 @@ CONFIG_SH_PCLK_FREQ=31250000 # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set -# CONFIG_DEVKMEM is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 315b04a8dd2f..601d062250d1 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -71,7 +71,6 @@ CONFIG_SMC91X=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=4 CONFIG_SERIAL_SH_SCI_CONSOLE=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 99975db461d8..79f02f1c0dc8 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -75,7 +75,6 @@ CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_LEGACY_PTYS is not set diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index 2c46c0004780..cbc9389a89a8 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -18,7 +18,6 @@ CONFIG_CPU_IDLE=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=6 CONFIG_SERIAL_SH_SCI_CONSOLE=y diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig index 88193153e51b..ee2357deba0f 100644 --- a/arch/sh/configs/sh7770_generic_defconfig +++ b/arch/sh/configs/sh7770_generic_defconfig @@ -20,7 +20,6 @@ CONFIG_CPU_IDLE=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=6 CONFIG_SERIAL_SH_SCI_CONSOLE=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 9b885c14c400..5c725c75fcef 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -66,7 +66,6 @@ CONFIG_INPUT_FF_MEMLESS=m CONFIG_INPUT_EVDEV=y CONFIG_INPUT_EVBUG=m CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=6 CONFIG_SERIAL_SH_SCI_CONSOLE=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 12a4fb0bd52a..18099099583e 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -122,7 +122,6 @@ CONFIG_INPUT_SPARCSPKR=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_PCIPS2=m CONFIG_SERIO_RAW=m -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SUNSU=y CONFIG_SERIAL_SUNSU_CONSOLE=y CONFIG_SERIAL_SUNSAB=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index 4f1ff9531f6a..062148e17135 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -72,7 +72,6 @@ CONFIG_MARVELL_PHY=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -CONFIG_DEVKMEM=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index d229a2d0c017..b151e0fcdeb5 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -334,16 +334,6 @@ config DEVMEM memory. When in doubt, say "Y". -config DEVKMEM - bool "/dev/kmem virtual device support" - # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write - depends on !ARM64 - help - Say Y here if you want to support the /dev/kmem device. The - /dev/kmem device is rarely used, but can be used for certain - kind of kernel debugging operations. - When in doubt, say "N". - config NVRAM tristate "/dev/nvram support" depends on X86 || HAVE_ARCH_NVRAM_OPS diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 869b9f5e8e03..15dc54fa1d47 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -403,221 +403,6 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) return 0; } -static int mmap_kmem(struct file *file, struct vm_area_struct *vma) -{ - unsigned long pfn; - - /* Turn a kernel-virtual address into a physical page frame */ - pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; - - /* - * RED-PEN: on some architectures there is more mapped memory than - * available in mem_map which pfn_valid checks for. Perhaps should add a - * new macro here. - * - * RED-PEN: vmalloc is not supported right now. - */ - if (!pfn_valid(pfn)) - return -EIO; - - vma->vm_pgoff = pfn; - return mmap_mem(file, vma); -} - -/* - * This function reads the *virtual* memory as seen by the kernel. - */ -static ssize_t read_kmem(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t low_count, read, sz; - char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ - int err = 0; - - read = 0; - if (p < (unsigned long) high_memory) { - low_count = count; - if (count > (unsigned long)high_memory - p) - low_count = (unsigned long)high_memory - p; - -#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE && low_count > 0) { - sz = size_inside_page(p, low_count); - if (clear_user(buf, sz)) - return -EFAULT; - buf += sz; - p += sz; - read += sz; - low_count -= sz; - count -= sz; - } -#endif - while (low_count > 0) { - sz = size_inside_page(p, low_count); - - /* - * On ia64 if a page has been mapped somewhere as - * uncached, then it must also be accessed uncached - * by the kernel or data corruption may occur - */ - kbuf = xlate_dev_kmem_ptr((void *)p); - if (!virt_addr_valid(kbuf)) - return -ENXIO; - - if (copy_to_user(buf, kbuf, sz)) - return -EFAULT; - buf += sz; - p += sz; - read += sz; - low_count -= sz; - count -= sz; - if (should_stop_iteration()) { - count = 0; - break; - } - } - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - sz = size_inside_page(p, count); - if (!is_vmalloc_or_module_addr((void *)p)) { - err = -ENXIO; - break; - } - sz = vread(kbuf, (char *)p, sz); - if (!sz) - break; - if (copy_to_user(buf, kbuf, sz)) { - err = -EFAULT; - break; - } - count -= sz; - buf += sz; - read += sz; - p += sz; - if (should_stop_iteration()) - break; - } - free_page((unsigned long)kbuf); - } - *ppos = p; - return read ? read : err; -} - - -static ssize_t do_write_kmem(unsigned long p, const char __user *buf, - size_t count, loff_t *ppos) -{ - ssize_t written, sz; - unsigned long copied; - - written = 0; -#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE) { - sz = size_inside_page(p, count); - /* Hmm. Do something? */ - buf += sz; - p += sz; - count -= sz; - written += sz; - } -#endif - - while (count > 0) { - void *ptr; - - sz = size_inside_page(p, count); - - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_kmem_ptr((void *)p); - if (!virt_addr_valid(ptr)) - return -ENXIO; - - copied = copy_from_user(ptr, buf, sz); - if (copied) { - written += sz - copied; - if (written) - break; - return -EFAULT; - } - buf += sz; - p += sz; - count -= sz; - written += sz; - if (should_stop_iteration()) - break; - } - - *ppos += written; - return written; -} - -/* - * This function writes to the *virtual* memory as seen by the kernel. - */ -static ssize_t write_kmem(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t wrote = 0; - ssize_t virtr = 0; - char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ - int err = 0; - - if (p < (unsigned long) high_memory) { - unsigned long to_write = min_t(unsigned long, count, - (unsigned long)high_memory - p); - wrote = do_write_kmem(p, buf, to_write, ppos); - if (wrote != to_write) - return wrote; - p += wrote; - buf += wrote; - count -= wrote; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return wrote ? wrote : -ENOMEM; - while (count > 0) { - unsigned long sz = size_inside_page(p, count); - unsigned long n; - - if (!is_vmalloc_or_module_addr((void *)p)) { - err = -ENXIO; - break; - } - n = copy_from_user(kbuf, buf, sz); - if (n) { - err = -EFAULT; - break; - } - vwrite(kbuf, (char *)p, sz); - count -= sz; - buf += sz; - virtr += sz; - p += sz; - if (should_stop_iteration()) - break; - } - free_page((unsigned long)kbuf); - } - - *ppos = p; - return virtr + wrote ? : err; -} - static ssize_t read_port(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -855,7 +640,6 @@ static int open_port(struct inode *inode, struct file *filp) #define write_zero write_null #define write_iter_zero write_iter_null #define open_mem open_port -#define open_kmem open_mem static const struct file_operations __maybe_unused mem_fops = { .llseek = memory_lseek, @@ -869,18 +653,6 @@ static const struct file_operations __maybe_unused mem_fops = { #endif }; -static const struct file_operations __maybe_unused kmem_fops = { - .llseek = memory_lseek, - .read = read_kmem, - .write = write_kmem, - .mmap = mmap_kmem, - .open = open_kmem, -#ifndef CONFIG_MMU - .get_unmapped_area = get_unmapped_area_mem, - .mmap_capabilities = memory_mmap_capabilities, -#endif -}; - static const struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, @@ -924,9 +696,6 @@ static const struct memdev { } devlist[] = { #ifdef CONFIG_DEVMEM [DEVMEM_MINOR] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET }, -#endif -#ifdef CONFIG_DEVKMEM - [2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET }, #endif [3] = { "null", 0666, &null_fops, 0 }, #ifdef CONFIG_DEVPORT diff --git a/include/linux/fs.h b/include/linux/fs.h index acef282b97c6..c3c88fdb9b2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -145,7 +145,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)0x1000) -/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ +/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) /* File is opened with O_PATH; almost nothing can be done with it */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 394d03cc0e92..f31ba59fb1ef 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -227,7 +227,7 @@ static inline void set_vm_flush_reset_perms(void *addr) } #endif -/* for /dev/kmem */ +/* for /proc/kcore */ extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index d3fd428f4b92..eb701b2ac72f 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -1,5 +1,4 @@ # KEEP ALPHABETICALLY SORTED -# CONFIG_DEVKMEM is not set # CONFIG_DEVMEM is not set # CONFIG_FHANDLE is not set # CONFIG_INET_LRO is not set diff --git a/mm/ksm.c b/mm/ksm.c index b321a67ebaa9..b7cbcc7d4977 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -459,7 +459,7 @@ static inline bool ksm_test_exit(struct mm_struct *mm) * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma, * in case the application has unmapped and remapped mm,addr meanwhile. * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP - * mmap of /dev/mem or /dev/kmem, where we would not want to touch it. + * mmap of /dev/mem, where we would not want to touch it. * * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context * of the process that owns 'vma'. We also do not want to enforce diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9c539f0730a5..2868692c6807 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3219,7 +3219,7 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count) * Note: In usual ops, vread() is never necessary because the caller * should know vmalloc() area is valid and can use memcpy(). * This is for routines which have to access vmalloc area without - * any information, as /dev/kmem. + * any information, as /proc/kcore. * * Return: number of bytes for which addr and buf should be increased * (same number as @count) or %0 if [addr...addr+count) doesn't -- cgit v1.2.3 From 48207f7d41c8bdae94d2aae11620ed76fee95d45 Mon Sep 17 00:00:00 2001 From: zhouchuangao Date: Thu, 6 May 2021 18:06:27 -0700 Subject: kernel/umh.c: fix some spelling mistakes Fix some spelling mistakes, and modify the order of the parameter comments to be consistent with the order of the parameters passed to the function. Link: https://lkml.kernel.org/r/1615636139-4076-1-git-send-email-zhouchuangao@vivo.com Signed-off-by: zhouchuangao Acked-by: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/umh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/umh.c b/kernel/umh.c index 61f6b82c354b..36c123360ab8 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -338,8 +338,8 @@ static void helper_unlock(void) * @argv: arg vector for process * @envp: environment for process * @gfp_mask: gfp mask for memory allocation - * @cleanup: a cleanup function * @init: an init function + * @cleanup: a cleanup function * @data: arbitrary context sensitive data * * Returns either %NULL on allocation failure, or a subprocess_info @@ -350,7 +350,7 @@ static void helper_unlock(void) * exec. A non-zero return code causes the process to error out, exit, * and return the failure to the calling process * - * The cleanup function is just before ethe subprocess_info is about to + * The cleanup function is just before the subprocess_info is about to * be freed. This can be used for freeing the argv and envp. The * Function must be runnable in either a process context or the * context in which call_usermodehelper_exec is called. @@ -386,7 +386,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); /** * call_usermodehelper_exec - start a usermode application - * @sub_info: information about the subprocessa + * @sub_info: information about the subprocess * @wait: wait for the application to finish and return status. * when UMH_NO_WAIT don't wait at all, but you get no useful error back * when the program couldn't be exec'ed. This makes it safe to call -- cgit v1.2.3 From a12f4f85bc5a70ff5b74a274d3074f12e1122913 Mon Sep 17 00:00:00 2001 From: Xiaofeng Cao Date: Thu, 6 May 2021 18:06:30 -0700 Subject: kernel/user_namespace.c: fix typos change 'verifing' to 'verifying' change 'certaint' to 'certain' change 'approprpiate' to 'appropriate' Link: https://lkml.kernel.org/r/20210317100129.12440-1-caoxiaofeng@yulong.com Signed-off-by: Xiaofeng Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/user_namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9a4b980d695b..8d62863721b0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -85,7 +85,7 @@ int create_user_ns(struct cred *new) /* * Verify that we can not violate the policy of which files * may be accessed that is specified by the root directory, - * by verifing that the root directory is at the root of the + * by verifying that the root directory is at the root of the * mount namespace which allows all files to be accessed. */ ret = -EPERM; @@ -1014,7 +1014,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, goto out; ret = -EINVAL; } - /* Be very certaint the new map actually exists */ + /* Be very certain the new map actually exists */ if (new_map.nr_extents == 0) goto out; @@ -1169,7 +1169,7 @@ static bool new_idmap_permitted(const struct file *file, /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. - * And the opener of the id file also had the approprpiate capability. + * And the opener of the id file also has the appropriate capability. */ if (ns_capable(ns->parent, cap_setid) && file_ns_capable(file, ns->parent, cap_setid)) -- cgit v1.2.3 From f0fffaff0b8960c9a110211510269744af1f1d1e Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 6 May 2021 18:06:33 -0700 Subject: kernel/up.c: fix typo s/condtions/conditions/ Link: https://lkml.kernel.org/r/20210317032732.3260835-1-unixbhaskar@gmail.com Signed-off-by: Bhaskar Chowdhury Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/up.c b/kernel/up.c index bf20b4a9af60..df50828cc2f0 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(smp_call_function_single_async); /* * Preemption is disabled here to make sure the cond_func is called under the - * same condtions in UP and SMP. + * same conditions in UP and SMP. */ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask) -- cgit v1.2.3 From 5afe69c2ccd069112fd299b573d30d6b14528b6c Mon Sep 17 00:00:00 2001 From: Xiaofeng Cao Date: Thu, 6 May 2021 18:06:36 -0700 Subject: kernel/sys.c: fix typo change 'infite' to 'infinite' change 'concurent' to 'concurrent' change 'memvers' to 'members' change 'decendants' to 'descendants' change 'argumets' to 'arguments' Link: https://lkml.kernel.org/r/20210316112904.10661-1-cxfcosmos@gmail.com Signed-off-by: Xiaofeng Cao Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 3d62c9599dc0..3a583a29815f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1590,7 +1590,7 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, /* * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not - * infite. In case of RLIM_INFINITY the posix CPU timer code + * infinite. In case of RLIM_INFINITY the posix CPU timer code * ignores the rlimit. */ if (!retval && new_rlim && resource == RLIMIT_CPU && @@ -2029,7 +2029,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data } /* - * arg_lock protects concurent updates but we still need mmap_lock for + * arg_lock protects concurrent updates but we still need mmap_lock for * read to exclude races with sys_brk. */ mmap_read_lock(mm); @@ -2041,7 +2041,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data * output in procfs mostly, except * * - @start_brk/@brk which are used in do_brk_flags but kernel lookups - * for VMAs when updating these memvers so anything wrong written + * for VMAs when updating these members so anything wrong written * here cause kernel to swear at userspace program but won't lead * to any problem in kernel itself */ @@ -2143,7 +2143,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EINVAL; /* - * arg_lock protects concurent updates of arg boundaries, we need + * arg_lock protects concurrent updates of arg boundaries, we need * mmap_lock for a) concurrent sys_brk, b) finding VMA for addr * validation. */ @@ -2210,7 +2210,7 @@ static int prctl_set_mm(int opt, unsigned long addr, * If command line arguments and environment * are placed somewhere else on stack, we can * set them up here, ARG_START/END to setup - * command line argumets and ENV_START/END + * command line arguments and ENV_START/END * for environment. */ case PR_SET_MM_START_STACK: @@ -2258,8 +2258,8 @@ static int prctl_get_tid_address(struct task_struct *me, int __user * __user *ti static int propagate_has_child_subreaper(struct task_struct *p, void *data) { /* - * If task has has_child_subreaper - all its decendants - * already have these flag too and new decendants will + * If task has has_child_subreaper - all its descendants + * already have these flag too and new descendants will * inherit it on fork, skip them. * * If we've found child_reaper - skip descendants in -- cgit v1.2.3