diff options
Diffstat (limited to 'kernel')
60 files changed, 1344 insertions, 1016 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index aa7368c7eabf..320f1f3941b7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,7 +51,7 @@ obj-y += livepatch/ obj-y += dma/ obj-y += entry/ -obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o +obj-$(CONFIG_KCMP) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/audit.c b/kernel/audit.c index 1ffc2e059027..551a394bc8f4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2285,7 +2285,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); - loginuid = from_kuid(&init_user_ns, kloginuid), + loginuid = from_kuid(&init_user_ns, kloginuid); tty = audit_get_tty(); audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); @@ -2365,7 +2365,7 @@ int audit_signal_info(int sig, struct task_struct *t) * * We can not do a netlink send inside an irq context because it blocks (last * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a - * queue and a tasklet is scheduled to remove them from the queue outside the + * queue and a kthread is scheduled to remove them from the queue outside the * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ce8c9e2279ba..47fb48f42c93 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -799,12 +799,12 @@ static int audit_in_mask(const struct audit_krule *rule, unsigned long val) return rule->mask[word] & bit; } -/* At syscall entry and exit time, this filter is called if the - * audit_state is not low enough that auditing cannot take place, but is - * also not high enough that we already know we have to write an audit - * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT). +/* At syscall exit time, this filter is called if the audit_state is + * not low enough that auditing cannot take place, but is also not + * high enough that we already know we have to write an audit record + * (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT). */ -static enum audit_state audit_filter_syscall(struct task_struct *tsk, +static void audit_filter_syscall(struct task_struct *tsk, struct audit_context *ctx, struct list_head *list) { @@ -812,7 +812,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, enum audit_state state; if (auditd_test_task(tsk)) - return AUDIT_DISABLED; + return; rcu_read_lock(); list_for_each_entry_rcu(e, list, list) { @@ -821,11 +821,11 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, &state, false)) { rcu_read_unlock(); ctx->current_state = state; - return state; + return; } } rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + return; } /* @@ -1930,7 +1930,7 @@ static inline int audit_copy_fcaps(struct audit_names *name, if (!dentry) return 0; - rc = get_vfs_caps_from_disk(dentry, &caps); + rc = get_vfs_caps_from_disk(&init_user_ns, dentry, &caps); if (rc) return rc; @@ -2481,7 +2481,8 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, ax->d.next = context->aux; context->aux = (void *)ax; - get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); + get_vfs_caps_from_disk(&init_user_ns, + bprm->file->f_path.dentry, &vcaps); ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index dd4b7fd60ee7..1576ff331ee4 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -122,7 +122,7 @@ static struct inode *bpf_get_inode(struct super_block *sb, inode->i_mtime = inode->i_atime; inode->i_ctime = inode->i_atime; - inode_init_owner(inode, dir, mode); + inode_init_owner(&init_user_ns, inode, dir, mode); return inode; } @@ -152,7 +152,8 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, dir->i_ctime = dir->i_mtime; } -static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -381,8 +382,8 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) return simple_lookup(dir, dentry, flags); } -static int bpf_symlink(struct inode *dir, struct dentry *dentry, - const char *target) +static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, const char *target) { char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); struct inode *inode; @@ -507,7 +508,7 @@ static void *bpf_obj_do_get(const char __user *pathname, return ERR_PTR(ret); inode = d_backing_inode(path.dentry); - ret = inode_permission(inode, ACC_MODE(flags)); + ret = path_permission(&path, ACC_MODE(flags)); if (ret) goto out; @@ -558,7 +559,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; - int ret = inode_permission(inode, MAY_READ); + int ret = inode_permission(&init_user_ns, inode, MAY_READ); if (ret) return ERR_PTR(ret); diff --git a/kernel/capability.c b/kernel/capability.c index de7eac903a2a..46a361dde042 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -484,10 +484,12 @@ EXPORT_SYMBOL(file_ns_capable); * * Return true if the inode uid and gid are within the namespace. */ -bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, + struct user_namespace *mnt_userns, + const struct inode *inode) { - return kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return kuid_has_mapping(ns, i_uid_into_mnt(mnt_userns, inode)) && + kgid_has_mapping(ns, i_gid_into_mnt(mnt_userns, inode)); } /** @@ -499,11 +501,13 @@ bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode * * its own user namespace and that the given inode's uid and gid are * mapped into the current user namespace. */ -bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) +bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns, + const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); + return ns_capable(ns, cap) && + privileged_wrt_inode_uidgid(ns, mnt_userns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1ea995f801ec..9153b20e5cc6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4672,7 +4672,7 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) if (!inode) return -ENOMEM; - ret = inode_permission(inode, MAY_WRITE); + ret = inode_permission(&init_user_ns, inode, MAY_WRITE); iput(inode); return ret; } @@ -4728,8 +4728,8 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, return ret; } -static ssize_t cgroup_procs_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) +static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + bool threadgroup) { struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; @@ -4740,7 +4740,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of, if (!dst_cgrp) return -ENODEV; - task = cgroup_procs_write_start(buf, true, &locked); + task = cgroup_procs_write_start(buf, threadgroup, &locked); ret = PTR_ERR_OR_ZERO(task); if (ret) goto out_unlock; @@ -4750,19 +4750,26 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); + /* process and thread migrations follow same delegation rule */ ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, true); + of->file->f_path.dentry->d_sb, threadgroup); if (ret) goto out_finish; - ret = cgroup_attach_task(dst_cgrp, task, true); + ret = cgroup_attach_task(dst_cgrp, task, threadgroup); out_finish: cgroup_procs_write_finish(task, locked); out_unlock: cgroup_kn_unlock(of->kn); - return ret ?: nbytes; + return ret; +} + +static ssize_t cgroup_procs_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + return __cgroup_procs_write(of, buf, true) ?: nbytes; } static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) @@ -4773,41 +4780,7 @@ static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) static ssize_t cgroup_threads_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup *src_cgrp, *dst_cgrp; - struct task_struct *task; - ssize_t ret; - bool locked; - - buf = strstrip(buf); - - dst_cgrp = cgroup_kn_lock_live(of->kn, false); - if (!dst_cgrp) - return -ENODEV; - - task = cgroup_procs_write_start(buf, false, &locked); - ret = PTR_ERR_OR_ZERO(task); - if (ret) - goto out_unlock; - - /* find the source cgroup */ - spin_lock_irq(&css_set_lock); - src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); - - /* thread migrations follow the cgroup.procs delegation rule */ - ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, false); - if (ret) - goto out_finish; - - ret = cgroup_attach_task(dst_cgrp, task, false); - -out_finish: - cgroup_procs_write_finish(task, locked); -out_unlock: - cgroup_kn_unlock(of->kn); - - return ret ?: nbytes; + return __cgroup_procs_write(of, buf, false) ?: nbytes; } /* cgroup core interface files for the default hierarchy */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 53c70c470a38..5258b68153e0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -98,7 +98,7 @@ struct cpuset { * and if it ends up empty, it will inherit the parent's mask. * * - * On legacy hierachy: + * On legacy hierarchy: * * The user-configured masks are always the same with effective masks. */ @@ -1309,10 +1309,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, * @cs: the cpuset to consider * @tmp: temp variables for calculating effective_cpus & partition setup * - * When congifured cpumask is changed, the effective cpumasks of this cpuset + * When configured cpumask is changed, the effective cpumasks of this cpuset * and all its descendants need to be updated. * - * On legacy hierachy, effective_cpus will be the same with cpu_allowed. + * On legacy hierarchy, effective_cpus will be the same with cpu_allowed. * * Called with cpuset_mutex held */ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index af6e8b4fb359..b636d517c02c 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -119,7 +119,6 @@ static DEFINE_RAW_SPINLOCK(dbg_slave_lock); */ static atomic_t masters_in_kgdb; static atomic_t slaves_in_kgdb; -static atomic_t kgdb_break_tasklet_var; atomic_t kgdb_setting_breakpoint; struct task_struct *kgdb_usethread; @@ -1084,31 +1083,6 @@ static void kgdb_unregister_callbacks(void) } } -/* - * There are times a tasklet needs to be used vs a compiled in - * break point so as to cause an exception outside a kgdb I/O module, - * such as is the case with kgdboe, where calling a breakpoint in the - * I/O driver itself would be fatal. - */ -static void kgdb_tasklet_bpt(unsigned long ing) -{ - kgdb_breakpoint(); - atomic_set(&kgdb_break_tasklet_var, 0); -} - -static DECLARE_TASKLET_OLD(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt); - -void kgdb_schedule_breakpoint(void) -{ - if (atomic_read(&kgdb_break_tasklet_var) || - atomic_read(&kgdb_active) != -1 || - atomic_read(&kgdb_setting_breakpoint)) - return; - atomic_inc(&kgdb_break_tasklet_var); - tasklet_schedule(&kgdb_tasklet_breakpoint); -} -EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint); - /** * kgdb_register_io_module - register KGDB IO module * @new_dbg_io_ops: the io ops vector @@ -1166,7 +1140,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) EXPORT_SYMBOL_GPL(kgdb_register_io_module); /** - * kkgdb_unregister_io_module - unregister KGDB IO module + * kgdb_unregister_io_module - unregister KGDB IO module * @old_dbg_io_ops: the io ops vector * * Unregister it with the KGDB core. diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a77df59d9ca5..e149a0ac9e9e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -595,7 +595,7 @@ static char *gdb_hex_reg_helper(int regnum, char *out) dbg_reg_def[i].size); } -/* Handle the 'p' individual regster get */ +/* Handle the 'p' individual register get */ static void gdb_cmd_reg_get(struct kgdb_state *ks) { unsigned long regnum; @@ -610,7 +610,7 @@ static void gdb_cmd_reg_get(struct kgdb_state *ks) gdb_hex_reg_helper(regnum, remcom_out_buffer); } -/* Handle the 'P' individual regster set */ +/* Handle the 'P' individual register set */ static void gdb_cmd_reg_set(struct kgdb_state *ks) { unsigned long regnum; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index a4281fb99299..6cb92f7bbbd0 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -230,7 +230,7 @@ extern struct task_struct *kdb_curr_task(int); #define kdb_task_has_cpu(p) (task_curr(p)) -#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL) extern void *debug_kmalloc(size_t size, gfp_t flags); extern void debug_kfree(void *); @@ -254,4 +254,14 @@ extern char kdb_prompt_str[]; #define KDB_WORD_SIZE ((int)sizeof(unsigned long)) #endif /* CONFIG_KGDB_KDB */ + +#define kdb_func_printf(format, args...) \ + kdb_printf("%s: " format, __func__, ## args) + +#define kdb_dbg_printf(mask, format, args...) \ + do { \ + if (KDB_DEBUG(mask)) \ + kdb_func_printf(format, ## args); \ + } while (0) + #endif /* !_KDBPRIVATE_H */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 6226502ce049..f7c1885abeb6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -39,20 +39,15 @@ */ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, - symtab); + kdb_dbg_printf(AR, "symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); if (symtab->sym_start) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 1, " - "symtab->sym_start=0x%lx\n", - symtab->sym_start); + kdb_dbg_printf(AR, "returns 1, symtab->sym_start=0x%lx\n", + symtab->sym_start); return 1; } - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 0\n"); + kdb_dbg_printf(AR, "returns 0\n"); return 0; } EXPORT_SYMBOL(kdbgetsymval); @@ -87,16 +82,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) #define knt1_size 128 /* must be >= kallsyms table size */ char *knt1 = NULL; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); + kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) goto out; knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC); if (!knt1) { - kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n", - addr); + kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr); goto out; } symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset, @@ -147,11 +140,8 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) if (symtab->mod_name == NULL) symtab->mod_name = "kernel"; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, - symtab->sym_start, symtab->mod_name, symtab->sym_name, - symtab->sym_name); + kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", + ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); out: debug_kfree(knt1); @@ -328,7 +318,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -353,7 +343,7 @@ int kdb_putarea_size(unsigned long addr, void *res, size_t size) int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -435,7 +425,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -484,7 +474,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -528,7 +518,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_putword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -602,8 +592,7 @@ unsigned long kdb_task_state_string(const char *s) res = ~0UL; break; default: - kdb_printf("%s: unknown flag '%c' ignored\n", - __func__, *s); + kdb_func_printf("unknown flag '%c' ignored\n", *s); break; } ++s; @@ -884,18 +873,16 @@ void debug_kusage(void) if (!debug_kusage_one_time) goto out; debug_kusage_one_time = 0; - kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n", - __func__, dah_first); + kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %px size %d\n", __func__, h_used, - h_used->size); + kdb_func_printf("h_used %px size %d\n", h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); } while (h_free->next); @@ -903,8 +890,8 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); } diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 479fc145acfc..77b405508743 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -33,9 +33,6 @@ config NEED_DMA_MAP_STATE config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT || PHYS_ADDR_T_64BIT -config ARCH_HAS_DMA_COHERENCE_H - bool - config ARCH_HAS_DMA_SET_MASK bool diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index da95df381483..e0e64f8b0739 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -21,6 +21,7 @@ #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +37,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; struct map_benchmark_data { @@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data) map_etime = ktime_get(); map_delta = ktime_sub(map_etime, map_stime); + /* Pretend DMA is transmitting */ + ndelay(map->bparam.dma_trans_ns); + unmap_stime = ktime_get(); dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); unmap_etime = ktime_get(); @@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } + if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) { + pr_err("invalid transmission delay\n"); + return -EINVAL; + } + if (map->bparam.node != NUMA_NO_NODE && !node_possible(map->bparam.node)) { pr_err("invalid numa node\n"); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f87a89d08654..b6a633679933 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -16,6 +16,8 @@ #include "debug.h" #include "direct.h" +bool dma_default_coherent; + /* * Managed DMA API */ @@ -515,46 +517,6 @@ void dma_free_pages(struct device *dev, size_t size, struct page *page, } EXPORT_SYMBOL_GPL(dma_free_pages); -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - void *vaddr; - - if (!ops || !ops->alloc_noncoherent) { - struct page *page; - - page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!page) - return NULL; - return page_address(page); - } - - size = PAGE_ALIGN(size); - vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp); - if (vaddr) - debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir, - *dma_handle); - return vaddr; -} -EXPORT_SYMBOL_GPL(dma_alloc_noncoherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (!ops || !ops->free_noncoherent) { - dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); - return; - } - - size = PAGE_ALIGN(size); - debug_dma_unmap_page(dev, dma_handle, size, dir); - ops->free_noncoherent(dev, size, vaddr, dma_handle, dir); -} -EXPORT_SYMBOL_GPL(dma_free_noncoherent); - int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 6aacd342cd14..288151393a06 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -205,6 +205,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, } fwnode_handle_get(fwnode); + fwnode_dev_initialized(fwnode, true); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); @@ -253,6 +254,7 @@ void irq_domain_remove(struct irq_domain *domain) pr_debug("Removed domain %s\n", domain->name); + fwnode_dev_initialized(domain->fwnode, false); fwnode_handle_put(domain->fwnode); if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) kfree(domain->name); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fe9de067771c..8043a90aa50e 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -177,6 +177,11 @@ unsigned long kallsyms_lookup_name(const char *name) return module_kallsyms_lookup_name(name); } +#ifdef CONFIG_LIVEPATCH +/* + * Iterate over all symbols in vmlinux. For symbols from modules use + * module_kallsyms_on_each_symbol instead. + */ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) @@ -192,8 +197,9 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, if (ret != 0) return ret; } - return module_kallsyms_on_each_symbol(fn, data); + return 0; } +#endif /* CONFIG_LIVEPATCH */ static unsigned long get_symbol_pos(unsigned long addr, unsigned long *symbolsize, diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b02086d70492..5c3447cf7ad5 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -166,6 +166,11 @@ void kimage_file_post_load_cleanup(struct kimage *image) vfree(pi->sechdrs); pi->sechdrs = NULL; +#ifdef CONFIG_IMA_KEXEC + vfree(image->ima_buffer); + image->ima_buffer = NULL; +#endif /* CONFIG_IMA_KEXEC */ + /* See if architecture has anything to cleanup post load */ arch_kimage_file_post_load_cleanup(image); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a3eb74a657..745f08fdd7a6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -861,7 +861,6 @@ out: cpus_read_unlock(); } -#ifdef CONFIG_SYSCTL static void optimize_all_kprobes(void) { struct hlist_head *head; @@ -887,6 +886,7 @@ out: mutex_unlock(&kprobe_mutex); } +#ifdef CONFIG_SYSCTL static void unoptimize_all_kprobes(void) { struct hlist_head *head; @@ -1520,13 +1520,16 @@ valid: return ap; } -/* Return error if the kprobe is being re-registered */ -static inline int check_kprobe_rereg(struct kprobe *p) +/* + * Warn and return error if the kprobe is being re-registered since + * there must be a software bug. + */ +static inline int warn_kprobe_rereg(struct kprobe *p) { int ret = 0; mutex_lock(&kprobe_mutex); - if (__get_valid_kprobe(p)) + if (WARN_ON_ONCE(__get_valid_kprobe(p))) ret = -EINVAL; mutex_unlock(&kprobe_mutex); @@ -1614,7 +1617,7 @@ int register_kprobe(struct kprobe *p) return PTR_ERR(addr); p->addr = addr; - ret = check_kprobe_rereg(p); + ret = warn_kprobe_rereg(p); if (ret) return ret; @@ -1995,7 +1998,7 @@ int register_kretprobe(struct kretprobe *rp) return ret; /* If only rp->kp.addr is specified, check reregistering kprobes */ - if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) + if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; if (kretprobe_blacklist_size) { @@ -2497,18 +2500,14 @@ static int __init init_kprobes(void) } } -#if defined(CONFIG_OPTPROBES) -#if defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots */ - kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; -#endif - /* By default, kprobes can be optimized */ - kprobes_allow_optimization = true; -#endif - /* By default, kprobes are armed */ kprobes_all_disarmed = false; +#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) + /* Init kprobe_optinsn_slots for allocation */ + kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; +#endif + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); @@ -2523,6 +2522,21 @@ static int __init init_kprobes(void) } early_initcall(init_kprobes); +#if defined(CONFIG_OPTPROBES) +static int __init init_optprobes(void) +{ + /* + * Enable kprobe optimization - this kicks the optimizer which + * depends on synchronize_rcu_tasks() and ksoftirqd, that is + * not spawned in early initcall. So delay the optimization. + */ + optimize_all_kprobes(); + + return 0; +} +subsys_initcall(init_optprobes); +#endif + #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index f76fdb925532..335d988bd811 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -19,6 +19,7 @@ #include <linux/moduleloader.h> #include <linux/completion.h> #include <linux/memory.h> +#include <linux/rcupdate.h> #include <asm/cacheflush.h> #include "core.h" #include "patch.h" @@ -57,7 +58,7 @@ static void klp_find_object_module(struct klp_object *obj) if (!klp_is_module(obj)) return; - mutex_lock(&module_mutex); + rcu_read_lock_sched(); /* * We do not want to block removal of patched modules and therefore * we do not take a reference here. The patches are removed by @@ -74,7 +75,7 @@ static void klp_find_object_module(struct klp_object *obj) if (mod && mod->klp_alive) obj->mod = mod; - mutex_unlock(&module_mutex); + rcu_read_unlock_sched(); } static bool klp_initialized(void) @@ -163,12 +164,10 @@ static int klp_find_object_symbol(const char *objname, const char *name, .pos = sympos, }; - mutex_lock(&module_mutex); if (objname) module_kallsyms_on_each_symbol(klp_find_callback, &args); else kallsyms_on_each_symbol(klp_find_callback, &args); - mutex_unlock(&module_mutex); /* * Ensure an address was found. If sympos is 0, ensure symbol is unique; diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index fe9ca92faa2a..4786dd271b45 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -12,7 +12,6 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/spinlock.h> -#include <asm/qrwlock.h> /** * queued_read_lock_slowpath - acquire read lock of a queue rwlock diff --git a/kernel/module.c b/kernel/module.c index 4bf30e4b3eaa..30479355ab85 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -87,8 +87,7 @@ * 3) module_addr_min/module_addr_max. * (delete and add uses RCU list operations). */ -DEFINE_MUTEX(module_mutex); -EXPORT_SYMBOL_GPL(module_mutex); +static DEFINE_MUTEX(module_mutex); static LIST_HEAD(modules); /* Work queue for freeing init sections in success case */ @@ -256,11 +255,6 @@ static void mod_update_bounds(struct module *mod) struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ -static void module_assert_mutex(void) -{ - lockdep_assert_held(&module_mutex); -} - static void module_assert_mutex_or_preempt(void) { #ifdef CONFIG_LOCKDEP @@ -414,19 +408,8 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const s32 __start___kcrctab[]; extern const s32 __start___kcrctab_gpl[]; -extern const s32 __start___kcrctab_gpl_future[]; -#ifdef CONFIG_UNUSED_SYMBOLS -extern const struct kernel_symbol __start___ksymtab_unused[]; -extern const struct kernel_symbol __stop___ksymtab_unused[]; -extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; -extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; -extern const s32 __start___kcrctab_unused[]; -extern const s32 __start___kcrctab_unused_gpl[]; -#endif #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL @@ -434,87 +417,14 @@ extern const s32 __start___kcrctab_unused_gpl[]; #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) #endif -static bool each_symbol_in_section(const struct symsearch *arr, - unsigned int arrsize, - struct module *owner, - bool (*fn)(const struct symsearch *syms, - struct module *owner, - void *data), - void *data) -{ - unsigned int j; - - for (j = 0; j < arrsize; j++) { - if (fn(&arr[j], owner, data)) - return true; - } - - return false; -} - -/* Returns true as soon as fn returns true, otherwise false. */ -static bool each_symbol_section(bool (*fn)(const struct symsearch *arr, - struct module *owner, - void *data), - void *data) -{ - struct module *mod; - static const struct symsearch arr[] = { - { __start___ksymtab, __stop___ksymtab, __start___kcrctab, - NOT_GPL_ONLY, false }, - { __start___ksymtab_gpl, __stop___ksymtab_gpl, - __start___kcrctab_gpl, - GPL_ONLY, false }, - { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, - __start___kcrctab_gpl_future, - WILL_BE_GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { __start___ksymtab_unused, __stop___ksymtab_unused, - __start___kcrctab_unused, - NOT_GPL_ONLY, true }, - { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, - __start___kcrctab_unused_gpl, - GPL_ONLY, true }, -#endif - }; - - module_assert_mutex_or_preempt(); - - if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) - return true; - - list_for_each_entry_rcu(mod, &modules, list, - lockdep_is_held(&module_mutex)) { - struct symsearch arr[] = { - { mod->syms, mod->syms + mod->num_syms, mod->crcs, - NOT_GPL_ONLY, false }, - { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, - mod->gpl_crcs, - GPL_ONLY, false }, - { mod->gpl_future_syms, - mod->gpl_future_syms + mod->num_gpl_future_syms, - mod->gpl_future_crcs, - WILL_BE_GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, - mod->unused_syms + mod->num_unused_syms, - mod->unused_crcs, - NOT_GPL_ONLY, true }, - { mod->unused_gpl_syms, - mod->unused_gpl_syms + mod->num_unused_gpl_syms, - mod->unused_gpl_crcs, - GPL_ONLY, true }, -#endif - }; - - if (mod->state == MODULE_STATE_UNFORMED) - continue; - - if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) - return true; - } - return false; -} +struct symsearch { + const struct kernel_symbol *start, *stop; + const s32 *crcs; + enum mod_license { + NOT_GPL_ONLY, + GPL_ONLY, + } license; +}; struct find_symbol_arg { /* Input */ @@ -535,28 +445,8 @@ static bool check_exported_symbol(const struct symsearch *syms, { struct find_symbol_arg *fsa = data; - if (!fsa->gplok) { - if (syms->license == GPL_ONLY) - return false; - if (syms->license == WILL_BE_GPL_ONLY && fsa->warn) { - pr_warn("Symbol %s is being used by a non-GPL module, " - "which will not be allowed in the future\n", - fsa->name); - } - } - -#ifdef CONFIG_UNUSED_SYMBOLS - if (syms->unused && fsa->warn) { - pr_warn("Symbol %s is marked as UNUSED, however this module is " - "using it.\n", fsa->name); - pr_warn("This symbol will go away in the future.\n"); - pr_warn("Please evaluate if this is the right api to use and " - "if it really is, submit a report to the linux kernel " - "mailing list together with submitting your code for " - "inclusion.\n"); - } -#endif - + if (!fsa->gplok && syms->license == GPL_ONLY) + return false; fsa->owner = owner; fsa->crc = symversion(syms->crcs, symnum); fsa->sym = &syms->start[symnum]; @@ -619,31 +509,44 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, * Find an exported symbol and return it, along with, (optional) crc and * (optional) module which owns it. Needs preempt disabled or module_mutex. */ -static const struct kernel_symbol *find_symbol(const char *name, - struct module **owner, - const s32 **crc, - enum mod_license *license, - bool gplok, - bool warn) -{ - struct find_symbol_arg fsa; - - fsa.name = name; - fsa.gplok = gplok; - fsa.warn = warn; - - if (each_symbol_section(find_exported_symbol_in_section, &fsa)) { - if (owner) - *owner = fsa.owner; - if (crc) - *crc = fsa.crc; - if (license) - *license = fsa.license; - return fsa.sym; +static bool find_symbol(struct find_symbol_arg *fsa) +{ + static const struct symsearch arr[] = { + { __start___ksymtab, __stop___ksymtab, __start___kcrctab, + NOT_GPL_ONLY }, + { __start___ksymtab_gpl, __stop___ksymtab_gpl, + __start___kcrctab_gpl, + GPL_ONLY }, + }; + struct module *mod; + unsigned int i; + + module_assert_mutex_or_preempt(); + + for (i = 0; i < ARRAY_SIZE(arr); i++) + if (find_exported_symbol_in_section(&arr[i], NULL, fsa)) + return true; + + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { + struct symsearch arr[] = { + { mod->syms, mod->syms + mod->num_syms, mod->crcs, + NOT_GPL_ONLY }, + { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, + mod->gpl_crcs, + GPL_ONLY }, + }; + + if (mod->state == MODULE_STATE_UNFORMED) + continue; + + for (i = 0; i < ARRAY_SIZE(arr); i++) + if (find_exported_symbol_in_section(&arr[i], mod, fsa)) + return true; } - pr_debug("Failed to find symbol %s\n", name); - return NULL; + pr_debug("Failed to find symbol %s\n", fsa->name); + return false; } /* @@ -669,10 +572,8 @@ static struct module *find_module_all(const char *name, size_t len, struct module *find_module(const char *name) { - module_assert_mutex(); return find_module_all(name, strlen(name), false); } -EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP @@ -1107,12 +1008,15 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) void __symbol_put(const char *symbol) { - struct module *owner; + struct find_symbol_arg fsa = { + .name = symbol, + .gplok = true, + }; preempt_disable(); - if (!find_symbol(symbol, &owner, NULL, NULL, true, false)) + if (!find_symbol(&fsa)) BUG(); - module_put(owner); + module_put(fsa.owner); preempt_enable(); } EXPORT_SYMBOL(__symbol_put); @@ -1381,19 +1285,22 @@ bad_version: static inline int check_modstruct_version(const struct load_info *info, struct module *mod) { - const s32 *crc; + struct find_symbol_arg fsa = { + .name = "module_layout", + .gplok = true, + }; /* * Since this should be found in kernel (which can't be removed), no * locking is necessary -- use preempt_disable() to placate lockdep. */ preempt_disable(); - if (!find_symbol("module_layout", NULL, &crc, NULL, true, false)) { + if (!find_symbol(&fsa)) { preempt_enable(); BUG(); } preempt_enable(); - return check_version(info, "module_layout", mod, crc); + return check_version(info, "module_layout", mod, fsa.crc); } /* First part is kernel version, which we ignore if module has crcs. */ @@ -1487,10 +1394,11 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, const char *name, char ownername[]) { - struct module *owner; - const struct kernel_symbol *sym; - const s32 *crc; - enum mod_license license; + struct find_symbol_arg fsa = { + .name = name, + .gplok = !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), + .warn = true, + }; int err; /* @@ -1500,42 +1408,40 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, */ sched_annotate_sleep(); mutex_lock(&module_mutex); - sym = find_symbol(name, &owner, &crc, &license, - !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - if (!sym) + if (!find_symbol(&fsa)) goto unlock; - if (license == GPL_ONLY) + if (fsa.license == GPL_ONLY) mod->using_gplonly_symbols = true; - if (!inherit_taint(mod, owner)) { - sym = NULL; + if (!inherit_taint(mod, fsa.owner)) { + fsa.sym = NULL; goto getname; } - if (!check_version(info, name, mod, crc)) { - sym = ERR_PTR(-EINVAL); + if (!check_version(info, name, mod, fsa.crc)) { + fsa.sym = ERR_PTR(-EINVAL); goto getname; } - err = verify_namespace_is_imported(info, sym, mod); + err = verify_namespace_is_imported(info, fsa.sym, mod); if (err) { - sym = ERR_PTR(err); + fsa.sym = ERR_PTR(err); goto getname; } - err = ref_module(mod, owner); + err = ref_module(mod, fsa.owner); if (err) { - sym = ERR_PTR(err); + fsa.sym = ERR_PTR(err); goto getname; } getname: /* We must make copy under the lock if we failed to get ref. */ - strncpy(ownername, module_name(owner), MODULE_NAME_LEN); + strncpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN); unlock: mutex_unlock(&module_mutex); - return sym; + return fsa.sym; } static const struct kernel_symbol * @@ -2296,16 +2202,19 @@ static void free_module(struct module *mod) void *__symbol_get(const char *symbol) { - struct module *owner; - const struct kernel_symbol *sym; + struct find_symbol_arg fsa = { + .name = symbol, + .gplok = true, + .warn = true, + }; preempt_disable(); - sym = find_symbol(symbol, &owner, NULL, NULL, true, true); - if (sym && strong_try_module_get(owner)) - sym = NULL; + if (!find_symbol(&fsa) || strong_try_module_get(fsa.owner)) { + preempt_enable(); + return NULL; + } preempt_enable(); - - return sym ? (void *)kernel_symbol_value(sym) : NULL; + return (void *)kernel_symbol_value(fsa.sym); } EXPORT_SYMBOL_GPL(__symbol_get); @@ -2318,7 +2227,6 @@ EXPORT_SYMBOL_GPL(__symbol_get); static int verify_exported_symbols(struct module *mod) { unsigned int i; - struct module *owner; const struct kernel_symbol *s; struct { const struct kernel_symbol *sym; @@ -2326,21 +2234,19 @@ static int verify_exported_symbols(struct module *mod) } arr[] = { { mod->syms, mod->num_syms }, { mod->gpl_syms, mod->num_gpl_syms }, - { mod->gpl_future_syms, mod->num_gpl_future_syms }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, mod->num_unused_syms }, - { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, -#endif }; for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (find_symbol(kernel_symbol_name(s), &owner, NULL, - NULL, true, false)) { + struct find_symbol_arg fsa = { + .name = kernel_symbol_name(s), + .gplok = true, + }; + if (find_symbol(&fsa)) { pr_err("%s: exports duplicate symbol %s" " (owned by %s)\n", mod->name, kernel_symbol_name(s), - module_name(owner)); + module_name(fsa.owner)); return -ENOEXEC; } } @@ -2348,6 +2254,21 @@ static int verify_exported_symbols(struct module *mod) return 0; } +static bool ignore_undef_symbol(Elf_Half emachine, const char *name) +{ + /* + * On x86, PIC code and Clang non-PIC code may have call foo@PLT. GNU as + * before 2.37 produces an unreferenced _GLOBAL_OFFSET_TABLE_ on x86-64. + * i386 has a similar problem but may not deserve a fix. + * + * If we ever have to ignore many symbols, consider refactoring the code to + * only warn if referenced by a relocation. + */ + if (emachine == EM_386 || emachine == EM_X86_64) + return !strcmp(name, "_GLOBAL_OFFSET_TABLE_"); + return false; +} + /* Change all symbols so that st_value encodes the pointer directly. */ static int simplify_symbols(struct module *mod, const struct load_info *info) { @@ -2395,8 +2316,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) break; } - /* Ok if weak. */ - if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + /* Ok if weak or ignored. */ + if (!ksym && + (ELF_ST_BIND(sym[i].st_info) == STB_WEAK || + ignore_undef_symbol(info->hdr->e_machine, name))) break; ret = PTR_ERR(ksym) ?: -ENOENT; @@ -2964,7 +2887,7 @@ static int module_sig_check(struct load_info *info, int flags) } if (is_module_sig_enforced()) { - pr_notice("%s: loading of %s is rejected\n", info->name, reason); + pr_notice("Loading of %s is rejected\n", reason); return -EKEYREJECTED; } @@ -2977,9 +2900,33 @@ static int module_sig_check(struct load_info *info, int flags) } #endif /* !CONFIG_MODULE_SIG */ -/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ -static int elf_header_check(struct load_info *info) +static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) +{ + unsigned long secend; + + /* + * Check for both overflow and offset/size being + * too large. + */ + secend = shdr->sh_offset + shdr->sh_size; + if (secend < shdr->sh_offset || secend > info->len) + return -ENOEXEC; + + return 0; +} + +/* + * Sanity checks against invalid binaries, wrong arch, weird elf version. + * + * Also do basic validity checks against section offsets and sizes, the + * section name string table, and the indices used for it (sh_name). + */ +static int elf_validity_check(struct load_info *info) { + unsigned int i; + Elf_Shdr *shdr, *strhdr; + int err; + if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; @@ -2989,11 +2936,78 @@ static int elf_header_check(struct load_info *info) || info->hdr->e_shentsize != sizeof(Elf_Shdr)) return -ENOEXEC; + /* + * e_shnum is 16 bits, and sizeof(Elf_Shdr) is + * known and small. So e_shnum * sizeof(Elf_Shdr) + * will not overflow unsigned long on any platform. + */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > info->len - info->hdr->e_shoff)) return -ENOEXEC; + info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; + + /* + * Verify if the section name table index is valid. + */ + if (info->hdr->e_shstrndx == SHN_UNDEF + || info->hdr->e_shstrndx >= info->hdr->e_shnum) + return -ENOEXEC; + + strhdr = &info->sechdrs[info->hdr->e_shstrndx]; + err = validate_section_offset(info, strhdr); + if (err < 0) + return err; + + /* + * The section name table must be NUL-terminated, as required + * by the spec. This makes strcmp and pr_* calls that access + * strings in the section safe. + */ + info->secstrings = (void *)info->hdr + strhdr->sh_offset; + if (info->secstrings[strhdr->sh_size - 1] != '\0') + return -ENOEXEC; + + /* + * The code assumes that section 0 has a length of zero and + * an addr of zero, so check for it. + */ + if (info->sechdrs[0].sh_type != SHT_NULL + || info->sechdrs[0].sh_size != 0 + || info->sechdrs[0].sh_addr != 0) + return -ENOEXEC; + + for (i = 1; i < info->hdr->e_shnum; i++) { + shdr = &info->sechdrs[i]; + switch (shdr->sh_type) { + case SHT_NULL: + case SHT_NOBITS: + continue; + case SHT_SYMTAB: + if (shdr->sh_link == SHN_UNDEF + || shdr->sh_link >= info->hdr->e_shnum) + return -ENOEXEC; + fallthrough; + default: + err = validate_section_offset(info, shdr); + if (err < 0) { + pr_err("Invalid ELF section in module (section %u type %u)\n", + i, shdr->sh_type); + return err; + } + + if (shdr->sh_flags & SHF_ALLOC) { + if (shdr->sh_name >= strhdr->sh_size) { + pr_err("Invalid ELF section name in module (section %u type %u)\n", + i, shdr->sh_type); + return -ENOEXEC; + } + } + break; + } + } + return 0; } @@ -3095,11 +3109,6 @@ static int rewrite_section_headers(struct load_info *info, int flags) for (i = 1; i < info->hdr->e_shnum; i++) { Elf_Shdr *shdr = &info->sechdrs[i]; - if (shdr->sh_type != SHT_NOBITS - && info->len < shdr->sh_offset + shdr->sh_size) { - pr_err("Module len %lu truncated\n", info->len); - return -ENOEXEC; - } /* * Mark all sections sh_addr with their address in the @@ -3133,11 +3142,6 @@ static int setup_load_info(struct load_info *info, int flags) { unsigned int i; - /* Set up the convenience variables */ - info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; - info->secstrings = (void *)info->hdr - + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - /* Try to find a name early so we can log errors with a module name */ info->index.info = find_sec(info, ".modinfo"); if (info->index.info) @@ -3241,22 +3245,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->gpl_syms), &mod->num_gpl_syms); mod->gpl_crcs = section_addr(info, "__kcrctab_gpl"); - mod->gpl_future_syms = section_objs(info, - "__ksymtab_gpl_future", - sizeof(*mod->gpl_future_syms), - &mod->num_gpl_future_syms); - mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future"); - -#ifdef CONFIG_UNUSED_SYMBOLS - mod->unused_syms = section_objs(info, "__ksymtab_unused", - sizeof(*mod->unused_syms), - &mod->num_unused_syms); - mod->unused_crcs = section_addr(info, "__kcrctab_unused"); - mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl", - sizeof(*mod->unused_gpl_syms), - &mod->num_unused_gpl_syms); - mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl"); -#endif + #ifdef CONFIG_CONSTRUCTORS mod->ctors = section_objs(info, ".ctors", sizeof(*mod->ctors), &mod->num_ctors); @@ -3437,14 +3426,8 @@ static int check_module_license_and_versions(struct module *mod) pr_warn("%s: module license taints kernel.\n", mod->name); #ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !mod->crcs) - || (mod->num_gpl_syms && !mod->gpl_crcs) - || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) -#ifdef CONFIG_UNUSED_SYMBOLS - || (mod->num_unused_syms && !mod->unused_crcs) - || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) -#endif - ) { + if ((mod->num_syms && !mod->crcs) || + (mod->num_gpl_syms && !mod->gpl_crcs)) { return try_to_force_load(mod, "no versions for exported symbols"); } @@ -3894,26 +3877,50 @@ static int load_module(struct load_info *info, const char __user *uargs, long err = 0; char *after_dashes; - err = elf_header_check(info); + /* + * Do the signature check (if any) first. All that + * the signature check needs is info->len, it does + * not need any of the section info. That can be + * set up later. This will minimize the chances + * of a corrupt module causing problems before + * we even get to the signature check. + * + * The check will also adjust info->len by stripping + * off the sig length at the end of the module, making + * checks against info->len more correct. + */ + err = module_sig_check(info, flags); + if (err) + goto free_copy; + + /* + * Do basic sanity checks against the ELF header and + * sections. + */ + err = elf_validity_check(info); if (err) { - pr_err("Module has invalid ELF header\n"); + pr_err("Module has invalid ELF structures\n"); goto free_copy; } + /* + * Everything checks out, so set up the section info + * in the info structure. + */ err = setup_load_info(info, flags); if (err) goto free_copy; + /* + * Now that we know we have the correct module name, check + * if it's blacklisted. + */ if (blacklisted(info->name)) { err = -EPERM; pr_err("Module %s is blacklisted\n", info->name); goto free_copy; } - err = module_sig_check(info, flags); - if (err) - goto free_copy; - err = rewrite_section_headers(info, flags); if (err) goto free_copy; @@ -4374,16 +4381,16 @@ unsigned long module_kallsyms_lookup_name(const char *name) return ret; } +#ifdef CONFIG_LIVEPATCH int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { struct module *mod; unsigned int i; - int ret; - - module_assert_mutex(); + int ret = 0; + mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) { /* We hold module_mutex: no need for rcu_dereference_sched */ struct mod_kallsyms *kallsyms = mod->kallsyms; @@ -4399,11 +4406,13 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, ret = fn(data, kallsyms_symbol_name(kallsyms, i), mod, kallsyms_symbol_value(sym)); if (ret != 0) - return ret; + break; } } - return 0; + mutex_unlock(&module_mutex); + return ret; } +#endif /* CONFIG_LIVEPATCH */ #endif /* CONFIG_KALLSYMS */ /* Maximum number of characters written by module_flags() */ diff --git a/kernel/module_signature.c b/kernel/module_signature.c index 4224a1086b7d..00132d12487c 100644 --- a/kernel/module_signature.c +++ b/kernel/module_signature.c @@ -25,7 +25,7 @@ int mod_check_sig(const struct module_signature *ms, size_t file_len, return -EBADMSG; if (ms->id_type != PKEY_ID_PKCS7) { - pr_err("%s: Module is not signed with expected PKCS#7 message\n", + pr_err("%s: not signed with expected PKCS#7 message\n", name); return -ENOPKG; } diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 9d9fc678c91d..8723ae70ea1f 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -30,7 +30,7 @@ int mod_verify_sig(const void *mod, struct load_info *info) memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); - ret = mod_check_sig(&ms, modlen, info->name); + ret = mod_check_sig(&ms, modlen, "module"); if (ret) return ret; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a95c688621f..575a34b88936 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -735,9 +735,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + user->seq = r->info->seq; ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -812,6 +812,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; if (!user) @@ -820,9 +821,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -1559,6 +1560,7 @@ static void syslog_clear(void) int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1629,9 +1631,14 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + logbuf_unlock_irq(); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1643,7 +1650,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -3429,9 +3435,11 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = info.seq; + } } /* last entry */ diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 5dc9d022db07..73cc80e01cef 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -287,7 +287,7 @@ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** - * prb_rec_init_wd() - Initialize a buffer for writing records. + * prb_rec_init_wr() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0e6f746de6c..2e9e3ed7d63e 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -45,6 +45,8 @@ struct printk_safe_seq_buf { static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); static DEFINE_PER_CPU(int, printk_context); +static DEFINE_RAW_SPINLOCK(safe_read_lock); + #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif @@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s) */ static void __printk_safe_flush(struct irq_work *work) { - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct printk_safe_seq_buf *s = container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; @@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work) * different CPUs. This is especially important when printing * a backtrace. */ - raw_spin_lock_irqsave(&read_lock, flags); + raw_spin_lock_irqsave(&safe_read_lock, flags); i = 0; more: @@ -232,7 +232,7 @@ more: out: report_message_lost(s); - raw_spin_unlock_irqrestore(&read_lock, flags); + raw_spin_unlock_irqrestore(&safe_read_lock, flags); } /** @@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&safe_read_lock); + } + printk_safe_flush(); } diff --git a/kernel/resource.c b/kernel/resource.c index 833394f9c608..627e61b0c124 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -18,12 +18,15 @@ #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/proc_fs.h> +#include <linux/pseudo_fs.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/device.h> #include <linux/pfn.h> #include <linux/mm.h> +#include <linux/mount.h> #include <linux/resource_ext.h> +#include <uapi/linux/magic.h> #include <asm/io.h> @@ -1119,6 +1122,55 @@ resource_size_t resource_alignment(struct resource *res) static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait); +static struct inode *iomem_inode; + +#ifdef CONFIG_IO_STRICT_DEVMEM +static void revoke_iomem(struct resource *res) +{ + /* pairs with smp_store_release() in iomem_init_inode() */ + struct inode *inode = smp_load_acquire(&iomem_inode); + + /* + * Check that the initialization has completed. Losing the race + * is ok because it means drivers are claiming resources before + * the fs_initcall level of init and prevent iomem_get_mapping users + * from establishing mappings. + */ + if (!inode) + return; + + /* + * The expectation is that the driver has successfully marked + * the resource busy by this point, so devmem_is_allowed() + * should start returning false, however for performance this + * does not iterate the entire resource range. + */ + if (devmem_is_allowed(PHYS_PFN(res->start)) && + devmem_is_allowed(PHYS_PFN(res->end))) { + /* + * *cringe* iomem=relaxed says "go ahead, what's the + * worst that can happen?" + */ + return; + } + + unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1); +} +#else +static void revoke_iomem(struct resource *res) {} +#endif + +struct address_space *iomem_get_mapping(void) +{ + /* + * This function is only called from file open paths, hence guaranteed + * that fs_initcalls have completed and no need to check for NULL. But + * since revoke_iomem can be called before the initcall we still need + * the barrier to appease checkers. + */ + return smp_load_acquire(&iomem_inode)->i_mapping; +} + /** * __request_region - create a new busy resource region * @parent: parent resource descriptor @@ -1186,7 +1238,7 @@ struct resource * __request_region(struct resource *parent, write_unlock(&resource_lock); if (res && orig_parent == &iomem_resource) - revoke_devmem(res); + revoke_iomem(res); return res; } @@ -1786,4 +1838,48 @@ static int __init strict_iomem(char *str) return 1; } +static int iomem_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type iomem_fs_type = { + .name = "iomem", + .owner = THIS_MODULE, + .init_fs_context = iomem_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int __init iomem_init_inode(void) +{ + static struct vfsmount *iomem_vfs_mount; + static int iomem_fs_cnt; + struct inode *inode; + int rc; + + rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt); + if (rc < 0) { + pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc); + return rc; + } + + inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + pr_err("Cannot allocate inode for iomem: %d\n", rc); + simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt); + return rc; + } + + /* + * Publish iomem revocation inode initialized. + * Pairs with smp_load_acquire() in revoke_iomem(). + */ + smp_store_release(&iomem_inode, inode); + + return 0; +} + +fs_initcall(iomem_init_inode); + __setup("iomem=", strict_iomem); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f5ffc878411..ca2bb629595f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7028,6 +7028,46 @@ int __cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(__cond_resched_lock); +int __cond_resched_rwlock_read(rwlock_t *lock) +{ + int resched = should_resched(PREEMPT_LOCK_OFFSET); + int ret = 0; + + lockdep_assert_held_read(lock); + + if (rwlock_needbreak(lock) || resched) { + read_unlock(lock); + if (resched) + preempt_schedule_common(); + else + cpu_relax(); + ret = 1; + read_lock(lock); + } + return ret; +} +EXPORT_SYMBOL(__cond_resched_rwlock_read); + +int __cond_resched_rwlock_write(rwlock_t *lock) +{ + int resched = should_resched(PREEMPT_LOCK_OFFSET); + int ret = 0; + + lockdep_assert_held_write(lock); + + if (rwlock_needbreak(lock) || resched) { + write_unlock(lock); + if (resched) + preempt_schedule_common(); + else + cpu_relax(); + ret = 1; + write_lock(lock); + } + return ret; +} +EXPORT_SYMBOL(__cond_resched_rwlock_write); + /** * yield - yield the current processor to other threads. * diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 41e498b0008a..50cbad89f7fa 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -26,7 +26,7 @@ struct sugov_policy { struct sugov_tunables *tunables; struct list_head tunables_hook; - raw_spinlock_t update_lock; /* For shared policies */ + raw_spinlock_t update_lock; u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; @@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } * Make sugov_should_update_freq() ignore the rate limit when DL * has increased the utilization. */ -static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) +static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) - sg_policy->limits_changed = true; + sg_cpu->sg_policy->limits_changed = true; } static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; - ignore_dl_rate_limit(sg_cpu, sg_policy); + ignore_dl_rate_limit(sg_cpu); - if (!sugov_should_update_freq(sg_policy, time)) + if (!sugov_should_update_freq(sg_cpu->sg_policy, time)) return false; sugov_get_util(sg_cpu); @@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; - ignore_dl_rate_limit(sg_cpu, sg_policy); + ignore_dl_rate_limit(sg_cpu); if (sugov_should_update_freq(sg_policy, time)) { next_f = sugov_next_freq_shared(sg_cpu, time); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 952dc1c90229..1d60fc2c9987 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1164,7 +1164,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, * Make sure that any changes to mode from another thread have * been seen after SYSCALL_WORK_SECCOMP was seen. */ - rmb(); + smp_rmb(); if (!sd) { populate_seccomp_data(&sd_local); @@ -1284,6 +1284,8 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { BUG(); + + return -1; } #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 9d71046ea247..9908ec4a9bfe 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -26,6 +26,8 @@ #include <linux/tick.h> #include <linux/irq.h> +#include <asm/softirq_stack.h> + #define CREATE_TRACE_POINTS #include <trace/events/irq.h> diff --git a/kernel/sys.c b/kernel/sys.c index 6928d23c46ea..b09fe21e88ff 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1242,7 +1242,7 @@ static int override_release(char __user *release, size_t len) break; rest++; } - v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60; + v = LINUX_VERSION_PATCHLEVEL + 60; copy = clamp_t(size_t, len, 1, sizeof(buf)); copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); ret = copy_to_user(release, buf, copy + 1); @@ -1847,7 +1847,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) goto exit; - err = inode_permission(inode, MAY_EXEC); + err = file_permission(exe.file, MAY_EXEC); if (err) goto exit; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index c1a62ae7e812..9c266b93cbc0 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -60,6 +60,11 @@ config HAVE_NOP_MCOUNT help Arch supports the gcc options -pg with -mrecord-mcount and -nop-mcount +config HAVE_OBJTOOL_MCOUNT + bool + help + Arch supports objtool --mcount + config HAVE_C_RECORDMCOUNT bool help @@ -545,7 +550,7 @@ config KPROBE_EVENTS_ON_NOTRACE using kprobe events. If kprobes can use ftrace instead of breakpoint, ftrace related - functions are protected from kprobe-events to prevent an infinit + functions are protected from kprobe-events to prevent an infinite recursion or any unexpected execution path which leads to a kernel crash. @@ -602,6 +607,30 @@ config FTRACE_MCOUNT_RECORD depends on DYNAMIC_FTRACE depends on HAVE_FTRACE_MCOUNT_RECORD +config FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY + bool + depends on FTRACE_MCOUNT_RECORD + +config FTRACE_MCOUNT_USE_CC + def_bool y + depends on $(cc-option,-mrecord-mcount) + depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY + depends on FTRACE_MCOUNT_RECORD + +config FTRACE_MCOUNT_USE_OBJTOOL + def_bool y + depends on HAVE_OBJTOOL_MCOUNT + depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY + depends on !FTRACE_MCOUNT_USE_CC + depends on FTRACE_MCOUNT_RECORD + +config FTRACE_MCOUNT_USE_RECORDMCOUNT + def_bool y + depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY + depends on !FTRACE_MCOUNT_USE_CC + depends on !FTRACE_MCOUNT_USE_OBJTOOL + depends on FTRACE_MCOUNT_RECORD + config TRACING_MAP bool depends on ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -886,6 +915,10 @@ config PREEMPTIRQ_DELAY_TEST irq-disabled critical sections for 500us: modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3 + What's more, if you want to attach the test on the cpu which the latency + tracer is running on, specify cpu_affinity=cpu_num at the end of the + command. + If unsure, say N config SYNTH_EVENT_GEN_TEST diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e9ee4945043..c286c13bd31a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; - int pc = 0; + unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -107,7 +107,7 @@ record_it: memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); } } @@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; + unsigned int trace_ctx = 0; pid_t pid; - int cpu, pc = 0; + int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; @@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -301,7 +302,7 @@ record_it: memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } } diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 312d1a0ca3b6..8c4ffd076162 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -21,13 +21,16 @@ static ulong delay = 100; static char test_mode[12] = "irq"; static uint burst_size = 1; +static int cpu_affinity = -1; module_param_named(delay, delay, ulong, 0444); module_param_string(test_mode, test_mode, 12, 0444); module_param_named(burst_size, burst_size, uint, 0444); +module_param_named(cpu_affinity, cpu_affinity, int, 0444); MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)"); MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)"); MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; @@ -36,7 +39,9 @@ static struct completion done; static void busy_wait(ulong time) { u64 start, end; + start = trace_clock_local(); + do { end = trace_clock_local(); if (kthread_should_stop()) @@ -47,6 +52,7 @@ static void busy_wait(ulong time) static __always_inline void irqoff_test(void) { unsigned long flags; + local_irq_save(flags); busy_wait(delay); local_irq_restore(flags); @@ -113,6 +119,14 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); + struct cpumask cpu_mask; + + if (cpu_affinity > -1) { + cpumask_clear(&cpu_mask); + cpumask_set_cpu(cpu_affinity, &cpu_mask); + if (set_cpus_allowed_ptr(current, &cpu_mask)) + pr_err("cpu_affinity:%d, failed\n", cpu_affinity); + } for (i = 0; i < s; i++) (testfuncs[i])(i); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ec08f948dd80..b9dad3500041 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1112,8 +1112,7 @@ static struct list_head *rb_list_head(struct list_head *list) * its flags will be non zero. */ static inline int -rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page *page, struct list_head *list) +rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; @@ -1142,8 +1141,7 @@ static bool rb_is_reader_page(struct buffer_page *page) /* * rb_set_list_to_head - set a list_head to be pointing to head. */ -static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, - struct list_head *list) +static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; @@ -1166,7 +1164,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) /* * Set the previous list pointer to have the HEAD flag. */ - rb_set_list_to_head(cpu_buffer, head->list.prev); + rb_set_list_to_head(head->list.prev); } static void rb_list_head_clear(struct list_head *list) @@ -1241,8 +1239,7 @@ static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, old_flag, RB_PAGE_NORMAL); } -static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **bpage) +static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); @@ -1274,11 +1271,11 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) */ for (i = 0; i < 3; i++) { do { - if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { + if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } - rb_inc_page(cpu_buffer, &page); + rb_inc_page(&page); } while (page != head); } @@ -1824,7 +1821,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cond_resched(); to_remove_page = tmp_iter_page; - rb_inc_page(cpu_buffer, &tmp_iter_page); + rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); @@ -2062,10 +2059,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, put_online_cpus(); } else { - /* Make sure this CPU has been initialized */ - if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) - goto out; - cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) @@ -2271,7 +2264,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else - rb_inc_page(cpu_buffer, &iter->head_page); + rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; @@ -2374,7 +2367,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * want the outer most commit to reset it. */ new_head = next_page; - rb_inc_page(cpu_buffer, &new_head); + rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); @@ -2526,7 +2519,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, next_page = tail_page; - rb_inc_page(cpu_buffer, &next_page); + rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made @@ -2552,7 +2545,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * the buffer, unless the commit page is still on the * reader page. */ - if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { + if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then @@ -2583,7 +2576,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * have filled up the buffer with events * from interrupts and such, and wrapped. * - * Note, if the tail page is also the on the + * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != @@ -2879,7 +2872,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) return; local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); - rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); + rb_inc_page(&cpu_buffer->commit_page); /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -3638,14 +3631,14 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * Because the commit page may be on the reader page we * start with the next page and check the end loop there. */ - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); start = bpage; do { if (bpage->page == (void *)addr) { local_dec(&bpage->entries); return; } - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); } while (bpage != start); /* commit not part of this buffer?? */ @@ -4367,7 +4360,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->pages = reader->list.prev; /* The reader page will be pointing to the new head */ - rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); + rb_set_list_to_head(&cpu_buffer->reader_page->list); /* * We want to make sure we read the overruns after we set up our @@ -4406,7 +4399,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Now make the new head point back to the reader page. */ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; - rb_inc_page(cpu_buffer, &cpu_buffer->head_page); + rb_inc_page(&cpu_buffer->head_page); local_inc(&cpu_buffer->pages_read); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b5815a022ecc..e295c413580e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_eval_maps; int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc); + unsigned int trace_ctx); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -408,7 +408,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ + TRACE_ITER_HASH_PTR) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ @@ -454,6 +455,7 @@ static void __trace_array_put(struct trace_array *this_tr) /** * trace_array_put - Decrement the reference counter for this trace array. + * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later @@ -530,6 +532,7 @@ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. @@ -780,7 +783,7 @@ u64 ftrace_now(int cpu) } /** - * tracing_is_enabled - Show if global_trace has been disabled + * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for @@ -905,23 +908,23 @@ static inline void trace_access_lock_init(void) #ifdef CONFIG_STACKTRACE static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); #else static inline void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned long trace_ctx, + int skip, struct pt_regs *regs) { } @@ -929,24 +932,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr, static __always_inline void trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) + int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, type, flags, pc); + tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) - trace_event_setup(event, type, flags, pc); + trace_event_setup(event, type, trace_ctx); return event; } @@ -1007,25 +1010,22 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int alloc; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); if (!event) { size = 0; goto out; @@ -1044,7 +1044,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); out: ring_buffer_nest_end(buffer); return size; @@ -1061,25 +1061,22 @@ int __trace_bputs(unsigned long ip, const char *str) struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int size = sizeof(struct bputs_entry); int ret = 0; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + trace_ctx); if (!event) goto out; @@ -1088,7 +1085,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); ret = 1; out: @@ -2584,36 +2581,34 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -void -tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, - unsigned long flags, int pc) +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { - struct task_struct *tsk = current; + unsigned int trace_flags = irqs_status; + unsigned int pc; - entry->preempt_count = pc & 0xff; - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | -#else - TRACE_FLAG_IRQS_NOSUPPORT | -#endif - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); + pc = preempt_count(); + + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; + if (in_serving_softirq()) + trace_flags |= TRACE_FLAG_SOFTIRQ; + + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + return (trace_flags << 16) | (pc & 0xff); } -EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { - return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); + return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2733,7 +2728,7 @@ struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *entry; int val; @@ -2746,15 +2741,15 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { - trace_event_setup(entry, type, flags, pc); + trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; return entry; } this_cpu_dec(trace_buffered_event_cnt); } - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2763,8 +2758,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); } return entry; } @@ -2850,7 +2845,7 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc, fbuffer->regs); + fbuffer->trace_ctx, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2866,7 +2861,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); @@ -2877,8 +2872,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(tr, buffer, flags, pc); + ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); + ftrace_trace_userstack(tr, buffer, trace_ctx); } /* @@ -2892,9 +2887,8 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, } void -trace_function(struct trace_array *tr, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) +trace_function(struct trace_array *tr, unsigned long ip, unsigned long + parent_ip, unsigned int trace_ctx) { struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -2902,7 +2896,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -2936,8 +2930,8 @@ static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; @@ -2984,7 +2978,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, size = nr_entries * sizeof(unsigned long); event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + sizeof(*entry) + size, trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3005,22 +2999,22 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(buffer, flags, skip, pc, regs); + __ftrace_trace_stack(buffer, trace_ctx, skip, regs); } -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); return; } @@ -3034,7 +3028,7 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, return; rcu_irq_enter_irqson(); - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); rcu_irq_exit_irqson(); } @@ -3044,19 +3038,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, */ void trace_dump_stack(int skip) { - unsigned long flags; - if (tracing_disabled || tracing_selftest_running) return; - local_save_flags(flags); - #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif __ftrace_trace_stack(global_trace.array_buffer.buffer, - flags, skip, preempt_count(), NULL); + tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3065,7 +3055,7 @@ static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_buffer *buffer, unsigned long flags, int pc) + struct trace_buffer *buffer, unsigned int trace_ctx) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3092,7 +3082,7 @@ ftrace_trace_userstack(struct trace_array *tr, __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -3112,7 +3102,7 @@ ftrace_trace_userstack(struct trace_array *tr, #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc) + unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ @@ -3242,9 +3232,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - int len = 0, size, pc; + int len = 0, size; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -3252,7 +3242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); tbuffer = get_trace_buf(); @@ -3266,12 +3256,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3281,7 +3270,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); } out: @@ -3304,9 +3293,9 @@ __trace_array_vprintk(struct trace_buffer *buffer, { struct trace_event_call *call = &event_print; struct ring_buffer_event *event; - int len = 0, size, pc; + int len = 0, size; struct print_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; if (tracing_disabled || tracing_selftest_running) @@ -3315,7 +3304,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); @@ -3327,11 +3316,10 @@ __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - local_save_flags(flags); size = sizeof(*entry) + len + 1; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3340,7 +3328,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); } out: @@ -3543,6 +3531,65 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +static char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + if (iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -4336,6 +4383,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->temp_size = 128; /* + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. + */ + iter->fmt = NULL; + iter->fmt_size = 0; + + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. */ @@ -4486,6 +4543,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); kfree(iter->trace); kfree(iter->buffer_iter); @@ -6653,7 +6711,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6673,7 +6730,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ /* If less than "<faulted>", then make sure we can still add that */ @@ -6682,7 +6738,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -6734,7 +6790,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6756,14 +6811,13 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -9348,9 +9402,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ + /* Can not use kmalloc for iter.temp and iter.fmt */ iter.temp = static_temp_buf; iter.temp_size = STATIC_TEMP_BUF_SIZE; + iter.fmt = static_fmt_buf; + iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); @@ -9429,30 +9485,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); -int trace_run_command(const char *buf, int (*createfn)(int, char **)) -{ - char **argv; - int argc, ret; - - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = createfn(argc, argv); - - argv_free(argv); - - return ret; -} - #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char **)) + int (*createfn)(const char *)) { char *kbuf, *buf, *tmp; int ret = 0; @@ -9500,7 +9537,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (tmp) *tmp = '\0'; - ret = trace_run_command(buf, createfn); + ret = createfn(buf); if (ret) goto out; buf += size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e448d2da0b99..dec13ff66077 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; -/* - * trace_flag_type is an enumeration that holds different - * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCHED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, - TRACE_FLAG_NMI = 0x40, -}; - #define TRACE_BUF_SIZE 1024 struct trace_array; @@ -589,8 +570,7 @@ struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, - int pc); + unsigned int trace_ctx); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -601,6 +581,8 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +const char *trace_event_format(struct trace_iterator *iter, const char *fmt); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); @@ -615,11 +597,11 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); @@ -687,11 +669,10 @@ static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc); +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); #else -static inline void __trace_stack(struct trace_array *tr, unsigned long flags, - int skip, int pc) +static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { } #endif /* CONFIG_STACKTRACE */ @@ -831,10 +812,10 @@ extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; @@ -1194,6 +1175,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ + C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ @@ -1297,15 +1279,15 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec, void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trcace_ctx, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc) + unsigned int trace_ctx) { - trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); + trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -1366,8 +1348,7 @@ __event_trigger_test_discard(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1377,12 +1358,12 @@ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) + void *entry, unsigned int trace_ctx) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx); if (tt) event_triggers_post_call(file, tt); @@ -1394,8 +1375,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1408,14 +1388,14 @@ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, + void *entry, unsigned int trace_ctx, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); + trace_ctx, regs); if (tt) event_triggers_post_call(file, tt); @@ -1830,10 +1810,9 @@ extern int tracing_set_cpumask(struct trace_array *tr, #define MAX_EVENT_NAME_LEN 64 -extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char**)); + int (*createfn)(const char *)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index eff099123aa2..e47fdb4c92fb 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) struct ring_buffer_event *event; struct trace_branch *entry; unsigned long flags; - int pc; + unsigned int trace_ctx; const char *p; if (current->trace_recursion & TRACE_BRANCH_BIT) @@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) if (atomic_read(&data->disabled)) goto out; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(flags); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out; diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 4f967d5cd917..dc971a68dda4 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -31,23 +31,31 @@ int dyn_event_register(struct dyn_event_operations *ops) return 0; } -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type) { struct dyn_event *pos, *n; char *system = NULL, *event, *p; - int ret = -ENOENT; + int argc, ret = -ENOENT; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; if (argv[0][0] == '-') { - if (argv[0][1] != ':') - return -EINVAL; + if (argv[0][1] != ':') { + ret = -EINVAL; + goto out; + } event = &argv[0][2]; } else { event = strchr(argv[0], ':'); - if (!event) - return -EINVAL; + if (!event) { + ret = -EINVAL; + goto out; + } event++; } - argc--; argv++; p = strchr(event, '/'); if (p) { @@ -63,7 +71,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) if (type && type != pos->ops) continue; if (!pos->ops->match(system, event, - argc, (const char **)argv, pos)) + argc - 1, (const char **)argv + 1, pos)) continue; ret = pos->ops->free(pos); @@ -71,21 +79,22 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) break; } mutex_unlock(&event_mutex); - +out: + argv_free(argv); return ret; } -static int create_dyn_event(int argc, char **argv) +static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; int ret = -ENODEV; - if (argv[0][0] == '-' || argv[0][0] == '!') - return dyn_event_release(argc, argv, NULL); + if (raw_command[0] == '-' || raw_command[0] == '!') + return dyn_event_release(raw_command, NULL); mutex_lock(&dyn_event_ops_mutex); list_for_each_entry(ops, &dyn_event_ops_list, list) { - ret = ops->create(argc, (const char **)argv); + ret = ops->create(raw_command); if (!ret || ret != -ECANCELED) break; } diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index d6f72dcb7269..7754936b57ee 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -39,7 +39,7 @@ struct dyn_event; */ struct dyn_event_operations { struct list_head list; - int (*create)(int argc, const char *argv[]); + int (*create)(const char *raw_command); int (*show)(struct seq_file *m, struct dyn_event *ev); bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); @@ -97,7 +97,7 @@ void *dyn_event_seq_start(struct seq_file *m, loff_t *pos); void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a71181655958..288ad2c274fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc); void perf_trace_buf_update(void *record, u16 type) { struct trace_entry *entry = record; - int pc = preempt_count(); - unsigned long flags; - local_save_flags(flags); - tracing_generic_entry_update(entry, type, flags, pc); + tracing_generic_entry_update(entry, type, tracing_gen_ctx()); } NOKPROBE_SYMBOL(perf_trace_buf_update); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d387b774ceeb..a3563afd412d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -258,22 +258,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, trace_event_ignore_this_pid(trace_file)) return NULL; - local_save_flags(fbuffer->flags); - fbuffer->pc = preempt_count(); /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - fbuffer->pc--; + fbuffer->trace_ctx = tracing_gen_ctx_dec(); fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, - fbuffer->flags, fbuffer->pc); + fbuffer->trace_ctx); if (!fbuffer->event) return NULL; @@ -2101,16 +2098,20 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = tracefs_create_file("filter", 0644, dir->entry, dir, - &ftrace_subsystem_filter_fops); - if (!entry) { - kfree(system->filter); - system->filter = NULL; - pr_warn("Could not create tracefs '%s/filter' entry\n", name); - } + /* the ftrace system is special, do not create enable or filter files */ + if (strcmp(name, "ftrace") != 0) { - trace_create_file("enable", 0644, dir->entry, dir, - &ftrace_system_enable_fops); + entry = tracefs_create_file("filter", 0644, dir->entry, dir, + &ftrace_subsystem_filter_fops); + if (!entry) { + kfree(system->filter); + system->filter = NULL; + pr_warn("Could not create tracefs '%s/filter' entry\n", name); + } + + trace_create_file("enable", 0644, dir->entry, dir, + &ftrace_system_enable_fops); + } list_add(&dir->list, &tr->systems); @@ -3679,12 +3680,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -3692,11 +3692,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, if (disabled != 1) goto out; - local_save_flags(flags); - event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3704,7 +3702,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, - entry, flags, pc); + entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 22bcf7c51d1e..c188045c5f97 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) static int parse_entry(char *str, struct trace_event_call *call, void **pentry) { struct ftrace_event_field *field; - unsigned long irq_flags; void *entry = NULL; int entry_size; u64 val = 0; @@ -203,9 +202,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (!entry) return -ENOMEM; - local_save_flags(irq_flags); - tracing_generic_entry_update(entry, call->event.type, irq_flags, - preempt_count()); + tracing_generic_entry_update(entry, call->event.type, + tracing_gen_ctx()); while ((len = parse_field(str, call, &field, &val)) > 0) { if (is_function_field(field)) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 5a8bc0b421f1..2979a96595b4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -23,13 +23,14 @@ #undef ERRORS #define ERRORS \ C(BAD_NAME, "Illegal name"), \ - C(CMD_INCOMPLETE, "Incomplete command"), \ + C(INVALID_CMD, "Command must be of the form: <name> field[;field] ..."),\ + C(INVALID_DYN_CMD, "Command must be of the form: s or -:[synthetic/]<name> field[;field] ..."),\ C(EVENT_EXISTS, "Event already exists"), \ C(TOO_MANY_FIELDS, "Too many fields"), \ C(INCOMPLETE_TYPE, "Incomplete type"), \ C(INVALID_TYPE, "Invalid type"), \ - C(INVALID_FIELD, "Invalid field"), \ - C(CMD_TOO_LONG, "Command too long"), + C(INVALID_FIELD, "Invalid field"), \ + C(INVALID_ARRAY_SPEC, "Invalid array specification"), #undef C #define C(a, b) SYNTH_ERR_##a @@ -48,7 +49,7 @@ static int errpos(const char *str) return err_pos(last_cmd, str); } -static void last_cmd_set(char *str) +static void last_cmd_set(const char *str) { if (!str) return; @@ -62,7 +63,7 @@ static void synth_err(u8 err_type, u8 err_pos) err_type, err_pos); } -static int create_synth_event(int argc, const char **argv); +static int create_synth_event(const char *raw_command); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -579,18 +580,32 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(int argc, const char **argv, - int *consumed) +static int check_field_version(const char *prefix, const char *field_type, + const char *field_name) +{ + /* + * For backward compatibility, the old synthetic event command + * format did not require semicolons, and in order to not + * break user space, that old format must still work. If a new + * feature is added, then the format that uses the new feature + * will be required to have semicolons, as nothing that uses + * the old format would be using the new, yet to be created, + * feature. When a new feature is added, this will detect it, + * and return a number greater than 1, and require the format + * to use semicolons. + */ + return 1; +} + +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed, int *field_version) { - struct synth_field *field; const char *prefix = NULL, *field_type = argv[0], *field_name, *array; + struct synth_field *field; int len, ret = -ENOMEM; struct seq_buf s; ssize_t size; - if (field_type[0] == ';') - field_type++; - if (!strcmp(field_type, "unsigned")) { if (argc < 3) { synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type)); @@ -599,12 +614,19 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; - *consumed = 3; + *consumed += 3; } else { field_name = argv[1]; - *consumed = 2; + *consumed += 2; } + if (!field_name) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); + return ERR_PTR(-EINVAL); + } + + *field_version = check_field_version(prefix, field_type, field_name); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) return ERR_PTR(-ENOMEM); @@ -613,8 +635,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, array = strchr(field_name, '['); if (array) len -= strlen(array); - else if (field_name[len - 1] == ';') - len--; field->name = kmemdup_nul(field_name, len, GFP_KERNEL); if (!field->name) @@ -626,8 +646,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, goto free; } - if (field_type[0] == ';') - field_type++; len = strlen(field_type) + 1; if (array) @@ -644,11 +662,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, if (prefix) seq_buf_puts(&s, prefix); seq_buf_puts(&s, field_type); - if (array) { + if (array) seq_buf_puts(&s, array); - if (s.buffer[s.len - 1] == ';') - s.len--; - } if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) goto free; @@ -656,7 +671,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, size = synth_field_size(field->type); if (size < 0) { - synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); + if (array) + synth_err(SYNTH_ERR_INVALID_ARRAY_SPEC, errpos(field_name)); + else + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } else if (size == 0) { @@ -1160,46 +1178,13 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, } EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); -static int save_cmdstr(int argc, const char *name, const char **argv) -{ - struct seq_buf s; - char *buf; - int i; - - buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN); - - seq_buf_puts(&s, name); - - for (i = 0; i < argc; i++) { - seq_buf_putc(&s, ' '); - seq_buf_puts(&s, argv[i]); - } - - if (!seq_buf_buffer_left(&s)) { - synth_err(SYNTH_ERR_CMD_TOO_LONG, 0); - kfree(buf); - return -EINVAL; - } - buf[s.len] = 0; - last_cmd_set(buf); - - kfree(buf); - return 0; -} - -static int __create_synth_event(int argc, const char *name, const char **argv) +static int __create_synth_event(const char *name, const char *raw_fields) { + char **argv, *field_str, *tmp_fields, *saved_fields = NULL; struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + int consumed, cmd_version = 1, n_fields_this_loop; + int i, argc, n_fields = 0, ret = 0; struct synth_event *event = NULL; - int i, consumed = 0, n_fields = 0, ret = 0; - - ret = save_cmdstr(argc, name, argv); - if (ret) - return ret; /* * Argument syntax: @@ -1208,46 +1193,99 @@ static int __create_synth_event(int argc, const char *name, const char **argv) * where 'field' = type field_name */ - if (name[0] == '\0' || argc < 1) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + if (name[0] == '\0') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); return -EINVAL; } - mutex_lock(&event_mutex); - if (!is_good_name(name)) { synth_err(SYNTH_ERR_BAD_NAME, errpos(name)); - ret = -EINVAL; - goto out; + return -EINVAL; } + mutex_lock(&event_mutex); + event = find_synth_event(name); if (event) { synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name)); ret = -EEXIST; - goto out; + goto err; } - for (i = 0; i < argc - 1; i++) { - if (strcmp(argv[i], ";") == 0) - continue; - if (n_fields == SYNTH_FIELDS_MAX) { - synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); - ret = -EINVAL; + tmp_fields = saved_fields = kstrdup(raw_fields, GFP_KERNEL); + if (!tmp_fields) { + ret = -ENOMEM; + goto err; + } + + while ((field_str = strsep(&tmp_fields, ";")) != NULL) { + argv = argv_split(GFP_KERNEL, field_str, &argc); + if (!argv) { + ret = -ENOMEM; goto err; } - field = parse_synth_field(argc - i, &argv[i], &consumed); - if (IS_ERR(field)) { - ret = PTR_ERR(field); + if (!argc) + continue; + + n_fields_this_loop = 0; + consumed = 0; + while (argc > consumed) { + int field_version; + + field = parse_synth_field(argc - consumed, + argv + consumed, &consumed, + &field_version); + if (IS_ERR(field)) { + argv_free(argv); + ret = PTR_ERR(field); + goto err; + } + + /* + * Track the highest version of any field we + * found in the command. + */ + if (field_version > cmd_version) + cmd_version = field_version; + + /* + * Now sort out what is and isn't valid for + * each supported version. + * + * If we see more than 1 field per loop, it + * means we have multiple fields between + * semicolons, and that's something we no + * longer support in a version 2 or greater + * command. + */ + if (cmd_version > 1 && n_fields_this_loop >= 1) { + synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); + ret = -EINVAL; + goto err; + } + + fields[n_fields++] = field; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } + + n_fields_this_loop++; + } + + if (consumed < argc) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; goto err; } - fields[n_fields++] = field; - i += consumed - 1; + + argv_free(argv); } - if (i < argc && strcmp(argv[i], ";") != 0) { - synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i])); + if (n_fields == 0) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto err; } @@ -1266,6 +1304,8 @@ static int __create_synth_event(int argc, const char *name, const char **argv) out: mutex_unlock(&event_mutex); + kfree(saved_fields); + return ret; err: for (i = 0; i < n_fields; i++) @@ -1383,19 +1423,79 @@ int synth_event_delete(const char *event_name) } EXPORT_SYMBOL_GPL(synth_event_delete); -static int create_or_delete_synth_event(int argc, char **argv) +static int check_command(const char *raw_command) { - const char *name = argv[0]; - int ret; + char **argv = NULL, *cmd, *saved_cmd, *name_and_field; + int argc, ret = 0; + + cmd = saved_cmd = kstrdup(raw_command, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + name_and_field = strsep(&cmd, ";"); + if (!name_and_field) { + ret = -EINVAL; + goto free; + } + + if (name_and_field[0] == '!') + goto free; + + argv = argv_split(GFP_KERNEL, name_and_field, &argc); + if (!argv) { + ret = -ENOMEM; + goto free; + } + argv_free(argv); + + if (argc < 3) + ret = -EINVAL; +free: + kfree(saved_cmd); + + return ret; +} + +static int create_or_delete_synth_event(const char *raw_command) +{ + char *name = NULL, *fields, *p; + int ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + ret = check_command(raw_command); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + p = strpbrk(raw_command, " \t"); + if (!p && raw_command[0] != '!') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; + goto free; + } + + name = kmemdup_nul(raw_command, p ? p - raw_command : strlen(raw_command), GFP_KERNEL); + if (!name) + return -ENOMEM; - /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { ret = synth_event_delete(name + 1); - return ret; + goto free; } - ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); - return ret == -ECANCELED ? -EINVAL : ret; + fields = skip_spaces(p); + + ret = __create_synth_event(name, fields); +free: + kfree(name); + + return ret; } static int synth_event_run_command(struct dynevent_cmd *cmd) @@ -1403,7 +1503,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; - ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + ret = create_or_delete_synth_event(cmd->seq.buffer); if (ret) return ret; @@ -1939,10 +2039,27 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state) } EXPORT_SYMBOL_GPL(synth_event_trace_end); -static int create_synth_event(int argc, const char **argv) +static int create_synth_event(const char *raw_command) { - const char *name = argv[0]; - int len; + char *fields, *p; + const char *name; + int len, ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + p = strpbrk(raw_command, " \t"); + if (!p) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return -EINVAL; + } + + fields = skip_spaces(p); + + name = raw_command; if (name[0] != 's' || name[1] != ':') return -ECANCELED; @@ -1951,11 +2068,30 @@ static int create_synth_event(int argc, const char **argv) /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) + if (len == 0) { + synth_err(SYNTH_ERR_INVALID_DYN_CMD, 0); return -EINVAL; + } name += len; } - return __create_synth_event(argc - 1, name, argv + 1); + + len = name - raw_command; + + ret = check_command(raw_command + len); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + name = kmemdup_nul(raw_command + len, p - raw_command - len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + ret = __create_synth_event(name, fields); + + kfree(name); + + return ret; } static int synth_event_release(struct dyn_event *ev) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c5095dd28e20..f93723ca66bc 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -106,8 +106,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->array_buffer.cpu = get_cpu(); - put_cpu(); + tr->array_buffer.cpu = raw_smp_processor_id(); tracing_start_cmdline_record(); tracing_start_function_trace(tr); @@ -132,10 +131,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; int bit; int cpu; - int pc; if (unlikely(!tr->function_enabled)) return; @@ -144,15 +142,14 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); - if (!atomic_read(&data->disabled)) { - local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, pc); - } + if (!atomic_read(&data->disabled)) + trace_function(tr, ip, parent_ip, trace_ctx); + ftrace_test_recursion_unlock(bit); preempt_enable_notrace(); } @@ -184,7 +181,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; if (unlikely(!tr->function_enabled)) return; @@ -199,9 +196,9 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, ip, parent_ip, flags, pc); - __trace_stack(tr, flags, STACK_SKIP, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + trace_function(tr, ip, parent_ip, trace_ctx); + __trace_stack(tr, trace_ctx, STACK_SKIP); } atomic_dec(&data->disabled); @@ -404,13 +401,11 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned long flags; - int pc; + unsigned int trace_ctx; - local_save_flags(flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); - __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); + __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d874dec87131..0aa6e6faa943 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -96,8 +96,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; @@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); @@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int ret; int cpu; - int pc; if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) return 0; @@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) static void __trace_graph_function(struct trace_array *tr, - unsigned long ip, unsigned long flags, int pc) + unsigned long ip, unsigned int trace_ctx) { u64 time = trace_clock_local(); struct ftrace_graph_ent ent = { @@ -202,22 +201,21 @@ __trace_graph_function(struct trace_array *tr, .rettime = time, }; - __trace_graph_entry(tr, &ent, flags, pc); - __trace_graph_return(tr, &ret, flags, pc); + __trace_graph_entry(tr, &ent, trace_ctx); + __trace_graph_return(tr, &ret, trace_ctx); } void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { - __trace_graph_function(tr, ip, flags, pc); + __trace_graph_function(tr, ip, trace_ctx); } void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; @@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; ftrace_graph_addr_finish(trace); @@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); } atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c0df9b97f147..34dc1a712dcb 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; - unsigned long flags; - int pc; - - pc = preempt_count(); - local_save_flags(flags); event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), - flags, pc); + tracing_gen_ctx()); if (!event) return; entry = ring_buffer_event_data(event); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6756379b661f..590b3d51afae 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; if (!func_prolog_dec(tr, &data, &flags)) return; - trace_function(tr, ip, parent_ip, flags, preempt_count()); + trace_ctx = tracing_gen_ctx_flags(flags); + + trace_function(tr, ip, parent_ip, trace_ctx); atomic_dec(&data->disabled); } @@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; int ret; - int pc; if (ftrace_graph_ignore_func(trace)) return 0; @@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) if (!func_prolog_dec(tr, &data, &flags)) return 0; - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); return ret; @@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); if (!func_prolog_dec(tr, &data, &flags)) return; - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); } @@ -267,12 +270,12 @@ static void irqsoff_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } #else @@ -322,15 +325,13 @@ check_critical_timing(struct trace_array *tr, { u64 T0, T1, delta; unsigned long flags; - int pc; + unsigned int trace_ctx; T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; - local_save_flags(flags); - - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); if (!report_latency(tr, delta)) goto out; @@ -341,9 +342,9 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(tr, delta)) goto out_unlock; - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); /* Skip 5 functions to get to the irq/preempt enable function */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, trace_ctx, 5); if (data->critical_sequence != max_sequence) goto out_unlock; @@ -363,16 +364,15 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); } static nokprobe_inline void -start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; if (!tracer_enabled || !tracing_is_enabled()) return; @@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; - local_save_flags(flags); - - __trace_function(tr, ip, parent_ip, flags, pc); + __trace_function(tr, ip, parent_ip, tracing_gen_ctx()); per_cpu(tracing_cpu, cpu) = 1; @@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) } static nokprobe_inline void -stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; cpu = raw_smp_processor_id(); /* Always clear the tracing cpu on stopping the trace */ @@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) atomic_inc(&data->disabled); - local_save_flags(flags); - __trace_function(tr, ip, parent_ip, flags, pc); + trace_ctx = tracing_gen_ctx(); + __trace_function(tr, ip, parent_ip, trace_ctx); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) /* start and stop critical timings used to for stoppage (in idle) */ void start_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(start_critical_timings); NOKPROBE_SYMBOL(start_critical_timings); void stop_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(stop_critical_timings); NOKPROBE_SYMBOL(stop_critical_timings); @@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct trace_array *tr) */ void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - stop_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + stop_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_on); void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - start_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + start_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_off); @@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __read_mostly = #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - stop_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + stop_critical_timing(a0, a1); } void tracer_preempt_off(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - start_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + start_critical_timing(a0, a1); } static int preemptoff_tracer_init(struct trace_array *tr) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 56c7fbff7bd7..6fe770d86dc3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,7 +35,7 @@ static int __init set_kprobe_boot_events(char *str) } __setup("kprobe_event=", set_kprobe_boot_events); -static int trace_kprobe_create(int argc, const char **argv); +static int trace_kprobe_create(const char *raw_command); static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); @@ -124,9 +124,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) if (!p) return true; *p = '\0'; - mutex_lock(&module_mutex); + rcu_read_lock_sched(); ret = !!find_module(tk->symbol); - mutex_unlock(&module_mutex); + rcu_read_unlock_sched(); *p = ':'; return ret; @@ -711,7 +711,7 @@ static inline void sanitize_event_name(char *name) *name = '_'; } -static int trace_kprobe_create(int argc, const char *argv[]) +static int __trace_kprobe_create(int argc, const char *argv[]) { /* * Argument syntax: @@ -910,20 +910,25 @@ error: goto out; } -static int create_or_delete_trace_kprobe(int argc, char **argv) +static int trace_kprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_kprobe_create); +} + +static int create_or_delete_trace_kprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_kprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(argc, (const char **)argv); + ret = trace_kprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { - return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(cmd->seq.buffer); } /** @@ -1084,7 +1089,7 @@ int kprobe_event_delete(const char *name) snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); - return trace_run_command(buf, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(buf); } EXPORT_SYMBOL_GPL(kprobe_event_delete); @@ -1386,8 +1391,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1396,7 +1400,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1434,8 +1438,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1443,7 +1446,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1888,7 +1891,7 @@ static __init void setup_boot_kprobe_events(void) if (p) *p++ = '\0'; - ret = trace_run_command(cmd, create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe(cmd); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); @@ -1982,8 +1985,7 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; @@ -2004,8 +2006,7 @@ static __init int kprobe_trace_self_tests_init(void) } } - ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("r:testprobe2 kprobe_trace_selftest_target $retval"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; @@ -2078,13 +2079,13 @@ static __init int kprobe_trace_self_tests_init(void) trace_probe_event_call(&tk->tp), file); } - ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe2"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 84582bf1ed5f..64e77b513697 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -5,8 +5,6 @@ * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi> */ -#define DEBUG 1 - #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> @@ -300,10 +298,11 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -312,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry->rw = *rw; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -330,10 +329,11 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -342,7 +342,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry->map = *map; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 92b1575ae0ca..61255bad7e01 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -312,13 +312,23 @@ int trace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(trace_raw_output_prep); +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + va_end(ap); +} +EXPORT_SYMBOL(trace_event_printf); + static int trace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; trace_seq_printf(s, "%s: ", name); - trace_seq_vprintf(s, fmt, ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); return trace_handle_return(s); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d2867ccc6aca..ec589a4612df 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1134,3 +1134,20 @@ bool trace_probe_match_command_args(struct trace_probe *tp, } return true; } + +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)) +{ + int argc = 0, ret = 0; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, (const char **)argv); + + argv_free(argv); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2f703a20c724..7ce4027089ee 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -341,6 +341,7 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); bool trace_probe_match_command_args(struct trace_probe *tp, int argc, const char **argv); +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index c0181066dbe9..e5778d1d7a5b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -67,7 +67,7 @@ static bool function_enabled; static int func_prolog_preempt_disable(struct trace_array *tr, struct trace_array_cpu **data, - int *pc) + unsigned int *trace_ctx) { long disabled; int cpu; @@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (likely(!wakeup_task)) return 0; - *pc = preempt_count(); + *trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc, ret = 0; + unsigned int trace_ctx; + int ret = 0; if (ftrace_graph_ignore_func(trace)) return 0; @@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) if (ftrace_graph_notrace_addr(trace->func)) return 1; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return 0; - local_save_flags(flags); - ret = __trace_graph_entry(tr, trace, flags, pc); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -146,16 +145,14 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; - local_save_flags(flags); - __trace_graph_return(tr, trace, flags, pc); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; local_irq_save(flags); - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); local_irq_restore(flags); atomic_dec(&data->disabled); @@ -303,12 +300,12 @@ static void wakeup_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -375,7 +372,7 @@ static void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_context_switch; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ctx_switch_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_CTX, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; @@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void notrace @@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; tracing_record_cmdline(prev); @@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, if (next != wakeup_task) return; - pc = preempt_count(); - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); @@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out; local_irq_save(flags); + trace_ctx = tracing_gen_ctx_flags(flags); + arch_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ @@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* The task we are waiting for is waking up */ data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); - __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); - tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx); + tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); @@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_struct *p) { struct trace_array_cpu *data; int cpu = smp_processor_id(); - unsigned long flags; long disabled; - int pc; + unsigned int trace_ctx; if (likely(!tracer_enabled)) return; @@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_struct *p) (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; - pc = preempt_count(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; + trace_ctx = tracing_gen_ctx(); + /* interrupts should be off from try_to_wake_up */ arch_spin_lock(&wakeup_lock); @@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_struct *p) wakeup_task = get_task_struct(p); - local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); /* * We must be careful in using CALLER_ADDR2. But since wake_up * is not called by an assembly function (where as schedule is) * it should be safe to use it here. */ - __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx); out_locked: arch_spin_unlock(&wakeup_lock); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d85a2f0f316b..8bfcd3b09422 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; + unsigned int trace_ctx; unsigned long args[6]; - int pc; int syscall_nr; int size; @@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, irq_flags, pc); + sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; - int pc; + unsigned int trace_ctx; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), - irq_flags, pc); + trace_ctx); if (!event) return; @@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->ret = syscall_get_return_value(current, regs); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static int reg_event_syscall_enter(struct trace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3cf7128e1ad3..9b50869a5ddb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -34,7 +34,7 @@ struct uprobe_trace_entry_head { #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) -static int trace_uprobe_create(int argc, const char **argv); +static int trace_uprobe_create(const char *raw_command); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); @@ -530,7 +530,7 @@ end: * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ -static int trace_uprobe_create(int argc, const char **argv) +static int __trace_uprobe_create(int argc, const char **argv) { struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; @@ -716,14 +716,19 @@ fail_address_parse: return ret; } -static int create_or_delete_trace_uprobe(int argc, char **argv) +int trace_uprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_uprobe_create); +} + +static int create_or_delete_trace_uprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_uprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(argc, (const char **)argv); + ret = trace_uprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } @@ -961,7 +966,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0, 0); + call->event.type, size, 0); if (!event) return; @@ -977,7 +982,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); } /* uprobe handler */ @@ -1635,7 +1640,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) } #endif /* CONFIG_PERF_EVENTS */ -/* Make a trace interface for controling probe points */ +/* Make a trace interface for controlling probe points */ static __init int init_uprobe_trace(void) { int ret; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..9f478d29b926 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -53,6 +53,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -130,8 +136,9 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; - int nr_probes = 0; - int pos = -1; + int iter_probes; /* Iterate over old probe array. */ + int nr_probes = 0; /* Counter for probes */ + int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); @@ -140,13 +147,13 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - /* Insert before probes of lower priority */ - if (pos < 0 && old[nr_probes].prio < prio) - pos = nr_probes; - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; /* Skip stub functions. */ + if (old[iter_probes].func == tp_func->func && + old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ @@ -154,20 +161,24 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { - pos = nr_probes; - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - } else { - /* Copy higher priority probes ahead of the new probe */ - memcpy(new, old, pos * sizeof(struct tracepoint_func)); - /* Copy the rest after it. */ - memcpy(new + pos + 1, old + pos, - (nr_probes - pos) * sizeof(struct tracepoint_func)); + nr_probes = 0; + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; + /* Insert before probes of lower priority */ + if (pos < 0 && old[iter_probes].prio < prio) + pos = nr_probes++; + new[nr_probes++] = old[iter_probes]; } - } else + if (pos < 0) + pos = nr_probes++; + /* nr_probes now points to the end of the new array */ + } else { pos = 0; + nr_probes = 1; /* must point at end of array */ + } new[pos] = *tp_func; - new[nr_probes + 1].func = NULL; + new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; @@ -188,8 +199,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,14 +220,27 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) { + if ((old[i].func != tp_func->func || + old[i].data != tp_func->data) && + old[i].func != tp_stub_func) + new[j++] = old[i]; + } + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) { + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) + WRITE_ONCE(old[i].func, tp_stub_func); + } + *funcs = old; + } } debug_print_probes(*funcs); return old; @@ -295,10 +320,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } + + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; if (!tp_funcs) { /* Removed last function */ diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 0ef8f65bd2d7..9c9eb20dd2c5 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -413,7 +413,7 @@ static void put_watch(struct watch *watch) } /** - * init_watch_queue - Initialise a watch + * init_watch - Initialise a watch * @watch: The watch to initialise. * @wqueue: The queue to assign. * diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 894bb885b40b..0d150da252e8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2964,8 +2964,8 @@ reflush: if (++flush_cnt == 10 || (flush_cnt % 100 == 0 && flush_cnt <= 1000)) - pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n", - wq->name, flush_cnt); + pr_warn("workqueue %s: %s() isn't complete after %u tries\n", + wq->name, __func__, flush_cnt); mutex_unlock(&wq->mutex); goto reflush; |