From 80ffbaa5b1bd98e80e3239a3b8cfda2da433009a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 3 Sep 2018 06:09:34 -0600 Subject: kallsyms: reduce size a little on 64-bit Both kallsyms_num_syms and kallsyms_markers[] don't really need to use unsigned long as their (base) types; unsigned int fully suffices. Signed-off-by: Jan Beulich Signed-off-by: Masahiro Yamada --- kernel/kallsyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 02a0b01380d8..f3a04994e063 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -37,7 +37,7 @@ extern const u8 kallsyms_names[] __weak; * Tell the compiler that the count isn't in the small data section if the arch * has one (eg: FRV). */ -extern const unsigned long kallsyms_num_syms +extern const unsigned int kallsyms_num_syms __attribute__((weak, section(".rodata"))); extern const unsigned long kallsyms_relative_base @@ -46,7 +46,7 @@ __attribute__((weak, section(".rodata"))); extern const u8 kallsyms_token_table[] __weak; extern const u16 kallsyms_token_index[] __weak; -extern const unsigned long kallsyms_markers[] __weak; +extern const unsigned int kallsyms_markers[] __weak; /* * Expand a compressed symbol data into the resulting uncompressed string, -- cgit v1.2.3 From d6b183eda466415bb5defcf9afe4cb64734839e8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Aug 2018 16:20:28 -0400 Subject: tracing/kprobe: Remove unneeded extra strchr() from create_trace_kprobe() By utilizing a temporary variable, we can avoid adding another call to strchr(). Instead, save the first call to a temp variable, and then use that variable as the reference to set the event variable. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c30032367aab..508396edc56a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -749,10 +749,13 @@ static int create_trace_kprobe(int argc, char **argv) } if (event) { - if (strchr(event, '/')) { + char *slash; + + slash = strchr(event, '/'); + if (slash) { group = event; - event = strchr(group, '/') + 1; - event[-1] = '\0'; + event = slash + 1; + slash[0] = '\0'; if (strlen(group) == 0) { pr_info("Group name is not specified\n"); return -EINVAL; -- cgit v1.2.3 From 1cc33161a83d20b5462b1e93f95d3ce6388079ee Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:47 +0530 Subject: uprobes: Support SDT markers having reference count (semaphore) Userspace Statically Defined Tracepoints[1] are dtrace style markers inside userspace applications. Applications like PostgreSQL, MySQL, Pthread, Perl, Python, Java, Ruby, Node.js, libvirt, QEMU, glib etc have these markers embedded in them. These markers are added by developer at important places in the code. Each marker source expands to a single nop instruction in the compiled code but there may be additional overhead for computing the marker arguments which expands to couple of instructions. In case the overhead is more, execution of it can be omitted by runtime if() condition when no one is tracing on the marker: if (reference_counter > 0) { Execute marker instructions; } Default value of reference counter is 0. Tracer has to increment the reference counter before tracing on a marker and decrement it when done with the tracing. Implement the reference counter logic in core uprobe. User will be able to use it from trace_uprobe as well as from kernel module. New trace_uprobe definition with reference counter will now be: :[(ref_ctr_offset)] where ref_ctr_offset is an optional field. For kernel module, new variant of uprobe_register() has been introduced: uprobe_register_refctr(inode, offset, ref_ctr_offset, consumer) No new variant for uprobe_unregister() because it's assumed to have only one reference counter for one uprobe. [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation Note: 'reference counter' is called as 'semaphore' in original Dtrace (or Systemtap, bcc and even in ELF) documentation and code. But the term 'semaphore' is misleading in this context. This is just a counter used to hold number of tracers tracing on a marker. This is not really used for any synchronization. So we are calling it a 'reference counter' in kernel / perf code. Link: http://lkml.kernel.org/r/20180820044250.11659-2-ravi.bangoria@linux.ibm.com Reviewed-by: Masami Hiramatsu [Only trace_uprobe.c] Reviewed-by: Oleg Nesterov Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- include/linux/uprobes.h | 5 + kernel/events/uprobes.c | 259 ++++++++++++++++++++++++++++++++++++++++++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_uprobe.c | 38 ++++++- 4 files changed, 293 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index bb9d2084af03..103a48a48872 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -123,6 +123,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); @@ -160,6 +161,10 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return -ENOSYS; } +static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) +{ + return -ENOSYS; +} static inline int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3207a4d26849..934feb39f6be 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -73,6 +73,7 @@ struct uprobe { struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ loff_t offset; + loff_t ref_ctr_offset; unsigned long flags; /* @@ -88,6 +89,15 @@ struct uprobe { struct arch_uprobe arch; }; +struct delayed_uprobe { + struct list_head list; + struct uprobe *uprobe; + struct mm_struct *mm; +}; + +static DEFINE_MUTEX(delayed_uprobe_lock); +static LIST_HEAD(delayed_uprobe_list); + /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction @@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t return 1; } +static struct delayed_uprobe * +delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct delayed_uprobe *du; + + list_for_each_entry(du, &delayed_uprobe_list, list) + if (du->uprobe == uprobe && du->mm == mm) + return du; + return NULL; +} + +static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct delayed_uprobe *du; + + if (delayed_uprobe_check(uprobe, mm)) + return 0; + + du = kzalloc(sizeof(*du), GFP_KERNEL); + if (!du) + return -ENOMEM; + + du->uprobe = uprobe; + du->mm = mm; + list_add(&du->list, &delayed_uprobe_list); + return 0; +} + +static void delayed_uprobe_delete(struct delayed_uprobe *du) +{ + if (WARN_ON(!du)) + return; + list_del(&du->list); + kfree(du); +} + +static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct list_head *pos, *q; + struct delayed_uprobe *du; + + if (!uprobe && !mm) + return; + + list_for_each_safe(pos, q, &delayed_uprobe_list) { + du = list_entry(pos, struct delayed_uprobe, list); + + if (uprobe && du->uprobe != uprobe) + continue; + if (mm && du->mm != mm) + continue; + + delayed_uprobe_delete(du); + } +} + +static bool valid_ref_ctr_vma(struct uprobe *uprobe, + struct vm_area_struct *vma) +{ + unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); + + return uprobe->ref_ctr_offset && + vma->vm_file && + file_inode(vma->vm_file) == uprobe->inode && + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && + vma->vm_start <= vaddr && + vma->vm_end > vaddr; +} + +static struct vm_area_struct * +find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct vm_area_struct *tmp; + + for (tmp = mm->mmap; tmp; tmp = tmp->vm_next) + if (valid_ref_ctr_vma(uprobe, tmp)) + return tmp; + + return NULL; +} + +static int +__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) +{ + void *kaddr; + struct page *page; + struct vm_area_struct *vma; + int ret; + short *ptr; + + if (!vaddr || !d) + return -EINVAL; + + ret = get_user_pages_remote(NULL, mm, vaddr, 1, + FOLL_WRITE, &page, &vma, NULL); + if (unlikely(ret <= 0)) { + /* + * We are asking for 1 page. If get_user_pages_remote() fails, + * it may return 0, in that case we have to return error. + */ + return ret == 0 ? -EBUSY : ret; + } + + kaddr = kmap_atomic(page); + ptr = kaddr + (vaddr & ~PAGE_MASK); + + if (unlikely(*ptr + d < 0)) { + pr_warn("ref_ctr going negative. vaddr: 0x%lx, " + "curr val: %d, delta: %d\n", vaddr, *ptr, d); + ret = -EINVAL; + goto out; + } + + *ptr += d; + ret = 0; +out: + kunmap_atomic(kaddr); + put_page(page); + return ret; +} + +static void update_ref_ctr_warn(struct uprobe *uprobe, + struct mm_struct *mm, short d) +{ + pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " + "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", + d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, + (unsigned long long) uprobe->offset, + (unsigned long long) uprobe->ref_ctr_offset, mm); +} + +static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, + short d) +{ + struct vm_area_struct *rc_vma; + unsigned long rc_vaddr; + int ret = 0; + + rc_vma = find_ref_ctr_vma(uprobe, mm); + + if (rc_vma) { + rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); + ret = __update_ref_ctr(mm, rc_vaddr, d); + if (ret) + update_ref_ctr_warn(uprobe, mm, d); + + if (d > 0) + return ret; + } + + mutex_lock(&delayed_uprobe_lock); + if (d > 0) + ret = delayed_uprobe_add(uprobe, mm); + else + delayed_uprobe_remove(uprobe, mm); + mutex_unlock(&delayed_uprobe_lock); + + return ret; +} + /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for @@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { + struct uprobe *uprobe; struct page *old_page, *new_page; struct vm_area_struct *vma; - int ret; + int ret, is_register, ref_ctr_updated = 0; + + is_register = is_swbp_insn(&opcode); + uprobe = container_of(auprobe, struct uprobe, arch); retry: /* Read the page with vaddr into memory */ @@ -317,6 +491,15 @@ retry: if (ret <= 0) goto put_old; + /* We are going to replace instruction, update ref_ctr. */ + if (!ref_ctr_updated && uprobe->ref_ctr_offset) { + ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); + if (ret) + goto put_old; + + ref_ctr_updated = 1; + } + ret = anon_vma_prepare(vma); if (ret) goto put_old; @@ -337,6 +520,11 @@ put_old: if (unlikely(ret == -EAGAIN)) goto retry; + + /* Revert back reference counter if instruction update failed. */ + if (ret && is_register && ref_ctr_updated) + update_ref_ctr(uprobe, mm, -1); + return ret; } @@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe) static void put_uprobe(struct uprobe *uprobe) { - if (atomic_dec_and_test(&uprobe->ref)) + if (atomic_dec_and_test(&uprobe->ref)) { + /* + * If application munmap(exec_vma) before uprobe_unregister() + * gets called, we don't get a chance to remove uprobe from + * delayed_uprobe_list from remove_breakpoint(). Do it here. + */ + delayed_uprobe_remove(uprobe, NULL); kfree(uprobe); + } } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -484,7 +679,8 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) return u; } -static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) +static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, + loff_t ref_ctr_offset) { struct uprobe *uprobe, *cur_uprobe; @@ -494,6 +690,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = inode; uprobe->offset = offset; + uprobe->ref_ctr_offset = ref_ctr_offset; init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); @@ -895,7 +1092,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); * else return 0 (success) */ static int __uprobe_register(struct inode *inode, loff_t offset, - struct uprobe_consumer *uc) + loff_t ref_ctr_offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; @@ -912,7 +1109,7 @@ static int __uprobe_register(struct inode *inode, loff_t offset, return -EINVAL; retry: - uprobe = alloc_uprobe(inode, offset); + uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; /* @@ -938,10 +1135,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset, int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { - return __uprobe_register(inode, offset, uc); + return __uprobe_register(inode, offset, 0, uc); } EXPORT_SYMBOL_GPL(uprobe_register); +int uprobe_register_refctr(struct inode *inode, loff_t offset, + loff_t ref_ctr_offset, struct uprobe_consumer *uc) +{ + return __uprobe_register(inode, offset, ref_ctr_offset, uc); +} +EXPORT_SYMBOL_GPL(uprobe_register_refctr); + /* * uprobe_apply - unregister an already registered probe. * @inode: the file in which the probe has to be removed. @@ -1060,6 +1264,35 @@ static void build_probe_list(struct inode *inode, spin_unlock(&uprobes_treelock); } +/* @vma contains reference counter, not the probed instruction. */ +static int delayed_ref_ctr_inc(struct vm_area_struct *vma) +{ + struct list_head *pos, *q; + struct delayed_uprobe *du; + unsigned long vaddr; + int ret = 0, err = 0; + + mutex_lock(&delayed_uprobe_lock); + list_for_each_safe(pos, q, &delayed_uprobe_list) { + du = list_entry(pos, struct delayed_uprobe, list); + + if (du->mm != vma->vm_mm || + !valid_ref_ctr_vma(du->uprobe, vma)) + continue; + + vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); + ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); + if (ret) { + update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); + if (!err) + err = ret; + } + delayed_uprobe_delete(du); + } + mutex_unlock(&delayed_uprobe_lock); + return err; +} + /* * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. * @@ -1072,7 +1305,15 @@ int uprobe_mmap(struct vm_area_struct *vma) struct uprobe *uprobe, *u; struct inode *inode; - if (no_uprobe_events() || !valid_vma(vma, true)) + if (no_uprobe_events()) + return 0; + + if (vma->vm_file && + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && + test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) + delayed_ref_ctr_inc(vma); + + if (!valid_vma(vma, true)) return 0; inode = file_inode(vma->vm_file); @@ -1246,6 +1487,10 @@ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; + mutex_lock(&delayed_uprobe_lock); + delayed_uprobe_remove(NULL, mm); + mutex_unlock(&delayed_uprobe_lock); + if (!area) return; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bf6f1d70484d..147be8523560 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4621,7 +4621,7 @@ static const char readme_msg[] = "place (kretprobe): [:][+]|\n" #endif #ifdef CONFIG_UPROBE_EVENTS - "\t place: :\n" + " place (uprobe): :[(ref_ctr_offset)]\n" #endif "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e696667da29a..7b85172beab6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -47,6 +47,7 @@ struct trace_uprobe { struct inode *inode; char *filename; unsigned long offset; + unsigned long ref_ctr_offset; unsigned long nhit; struct trace_probe tp; }; @@ -352,10 +353,10 @@ end: static int create_trace_uprobe(int argc, char **argv) { struct trace_uprobe *tu; - char *arg, *event, *group, *filename; + char *arg, *event, *group, *filename, *rctr, *rctr_end; char buf[MAX_EVENT_NAME_LEN]; struct path path; - unsigned long offset; + unsigned long offset, ref_ctr_offset; bool is_delete, is_return; int i, ret; @@ -364,6 +365,7 @@ static int create_trace_uprobe(int argc, char **argv) is_return = false; event = NULL; group = NULL; + ref_ctr_offset = 0; /* argc must be >= 1 */ if (argv[0][0] == '-') @@ -438,6 +440,26 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } + /* Parse reference counter offset if specified. */ + rctr = strchr(arg, '('); + if (rctr) { + rctr_end = strchr(rctr, ')'); + if (rctr > rctr_end || *(rctr_end + 1) != 0) { + ret = -EINVAL; + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + + *rctr++ = '\0'; + *rctr_end = '\0'; + ret = kstrtoul(rctr, 0, &ref_ctr_offset); + if (ret) { + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + } + + /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); if (ret) goto fail_address_parse; @@ -472,6 +494,7 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } tu->offset = offset; + tu->ref_ctr_offset = ref_ctr_offset; tu->path = path; tu->filename = kstrdup(filename, GFP_KERNEL); @@ -590,6 +613,9 @@ static int probes_seq_show(struct seq_file *m, void *v) trace_event_name(&tu->tp.call), tu->filename, (int)(sizeof(void *) * 2), tu->offset); + if (tu->ref_ctr_offset) + seq_printf(m, "(0x%lx)", tu->ref_ctr_offset); + for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); @@ -905,7 +931,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, tu->consumer.filter = filter; tu->inode = d_real_inode(tu->path.dentry); - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + if (tu->ref_ctr_offset) { + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + } else { + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + } + if (ret) goto err_buffer; -- cgit v1.2.3 From 22bad38286d9a652d7061a02f9743bb2ebb84e59 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:48 +0530 Subject: uprobes/sdt: Prevent multiple reference counter for same uprobe We assume to have only one reference counter for one uprobe. Don't allow user to register multiple uprobes having same inode+offset but different reference counter. Link: http://lkml.kernel.org/r/20180820044250.11659-3-ravi.bangoria@linux.ibm.com Acked-by: Srikar Dronamraju Reviewed-by: Oleg Nesterov Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/events/uprobes.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 934feb39f6be..96fb51f3994f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -679,6 +679,16 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) return u; } +static void +ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) +{ + pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " + "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", + uprobe->inode->i_ino, (unsigned long long) uprobe->offset, + (unsigned long long) cur_uprobe->ref_ctr_offset, + (unsigned long long) uprobe->ref_ctr_offset); +} + static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, loff_t ref_ctr_offset) { @@ -698,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, cur_uprobe = insert_uprobe(uprobe); /* a uprobe exists for this inode:offset combination */ if (cur_uprobe) { + if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { + ref_ctr_mismatch_warn(cur_uprobe, uprobe); + put_uprobe(cur_uprobe); + kfree(uprobe); + return ERR_PTR(-EINVAL); + } kfree(uprobe); uprobe = cur_uprobe; } @@ -1112,6 +1128,9 @@ static int __uprobe_register(struct inode *inode, loff_t offset, uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; + if (IS_ERR(uprobe)) + return PTR_ERR(uprobe); + /* * We can race with uprobe_unregister()->delete_uprobe(). * Check uprobe_is_active() and retry if it is false. -- cgit v1.2.3 From ccea8727dc27d8f46df6557f9260ab32760ef409 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:49 +0530 Subject: trace_uprobe/sdt: Prevent multiple reference counter for same uprobe We assume to have only one reference counter for one uprobe. Don't allow user to add multiple trace_uprobe entries having same inode+offset but different reference counter. Ex, # echo "p:sdt_tick/loop2 /home/ravi/tick:0x6e4(0x10036)" > uprobe_events # echo "p:sdt_tick/loop2_1 /home/ravi/tick:0x6e4(0xfffff)" >> uprobe_events bash: echo: write error: Invalid argument # dmesg trace_kprobe: Reference counter offset mismatch. There is one exception though: When user is trying to replace the old entry with the new one, we allow this if the new entry does not conflict with any other existing entries. Link: http://lkml.kernel.org/r/20180820044250.11659-4-ravi.bangoria@linux.ibm.com Acked-by: Srikar Dronamraju Reviewed-by: Song Liu Reviewed-by: Oleg Nesterov Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7b85172beab6..3a7c73c40007 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -312,6 +312,35 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) return 0; } +/* + * Uprobe with multiple reference counter is not allowed. i.e. + * If inode and offset matches, reference counter offset *must* + * match as well. Though, there is one exception: If user is + * replacing old trace_uprobe with new one(same group/event), + * then we allow same uprobe with new reference counter as far + * as the new one does not conflict with any other existing + * ones. + */ +static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) +{ + struct trace_uprobe *tmp, *old = NULL; + struct inode *new_inode = d_real_inode(new->path.dentry); + + old = find_probe_event(trace_event_name(&new->tp.call), + new->tp.call.class->system); + + list_for_each_entry(tmp, &uprobe_list, list) { + if ((old ? old != tmp : true) && + new_inode == d_real_inode(tmp->path.dentry) && + new->offset == tmp->offset && + new->ref_ctr_offset != tmp->ref_ctr_offset) { + pr_warn("Reference counter offset mismatch."); + return ERR_PTR(-EINVAL); + } + } + return old; +} + /* Register a trace_uprobe and probe_event */ static int register_trace_uprobe(struct trace_uprobe *tu) { @@ -321,8 +350,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&uprobe_lock); /* register as an event */ - old_tu = find_probe_event(trace_event_name(&tu->tp.call), - tu->tp.call.class->system); + old_tu = find_old_trace_uprobe(tu); + if (IS_ERR(old_tu)) { + ret = PTR_ERR(old_tu); + goto end; + } + if (old_tu) { /* delete old event */ ret = unregister_trace_uprobe(old_tu); -- cgit v1.2.3 From a6ca88b241d5e929e6e60b12ad8cd288f0ffa256 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 1 Oct 2018 22:36:36 -0700 Subject: trace_uprobe: support reference counter in fd-based uprobe This patch enables uprobes with reference counter in fd-based uprobe. Highest 32 bits of perf_event_attr.config is used to stored offset of the reference count (semaphore). Format information in /sys/bus/event_source/devices/uprobe/format/ is updated to reflect this new feature. Link: http://lkml.kernel.org/r/20181002053636.1896903-1-songliubraving@fb.com Cc: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Song Liu Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 3 ++- kernel/events/core.c | 49 ++++++++++++++++++++++++++++++++--------- kernel/trace/trace_event_perf.c | 7 +++--- kernel/trace/trace_probe.h | 3 ++- kernel/trace/trace_uprobe.c | 4 +++- 5 files changed, 50 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 78a010e19ed4..4130a5497d40 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -575,7 +575,8 @@ extern int bpf_get_kprobe_info(const struct perf_event *event, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS -extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe); +extern int perf_uprobe_init(struct perf_event *event, + unsigned long ref_ctr_offset, bool is_retprobe); extern void perf_uprobe_destroy(struct perf_event *event); extern int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, diff --git a/kernel/events/core.c b/kernel/events/core.c index c80549bf82c6..65b30773af3e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8368,30 +8368,39 @@ static struct pmu perf_tracepoint = { * * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe * if not set, create kprobe/uprobe + * + * The following values specify a reference counter (or semaphore in the + * terminology of tools like dtrace, systemtap, etc.) Userspace Statically + * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset. + * + * PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset + * PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left */ enum perf_probe_config { PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */ + PERF_UPROBE_REF_CTR_OFFSET_BITS = 32, + PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS, }; PMU_FORMAT_ATTR(retprobe, "config:0"); +#endif -static struct attribute *probe_attrs[] = { +#ifdef CONFIG_KPROBE_EVENTS +static struct attribute *kprobe_attrs[] = { &format_attr_retprobe.attr, NULL, }; -static struct attribute_group probe_format_group = { +static struct attribute_group kprobe_format_group = { .name = "format", - .attrs = probe_attrs, + .attrs = kprobe_attrs, }; -static const struct attribute_group *probe_attr_groups[] = { - &probe_format_group, +static const struct attribute_group *kprobe_attr_groups[] = { + &kprobe_format_group, NULL, }; -#endif -#ifdef CONFIG_KPROBE_EVENTS static int perf_kprobe_event_init(struct perf_event *event); static struct pmu perf_kprobe = { .task_ctx_nr = perf_sw_context, @@ -8401,7 +8410,7 @@ static struct pmu perf_kprobe = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, - .attr_groups = probe_attr_groups, + .attr_groups = kprobe_attr_groups, }; static int perf_kprobe_event_init(struct perf_event *event) @@ -8433,6 +8442,24 @@ static int perf_kprobe_event_init(struct perf_event *event) #endif /* CONFIG_KPROBE_EVENTS */ #ifdef CONFIG_UPROBE_EVENTS +PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63"); + +static struct attribute *uprobe_attrs[] = { + &format_attr_retprobe.attr, + &format_attr_ref_ctr_offset.attr, + NULL, +}; + +static struct attribute_group uprobe_format_group = { + .name = "format", + .attrs = uprobe_attrs, +}; + +static const struct attribute_group *uprobe_attr_groups[] = { + &uprobe_format_group, + NULL, +}; + static int perf_uprobe_event_init(struct perf_event *event); static struct pmu perf_uprobe = { .task_ctx_nr = perf_sw_context, @@ -8442,12 +8469,13 @@ static struct pmu perf_uprobe = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, - .attr_groups = probe_attr_groups, + .attr_groups = uprobe_attr_groups, }; static int perf_uprobe_event_init(struct perf_event *event) { int err; + unsigned long ref_ctr_offset; bool is_retprobe; if (event->attr.type != perf_uprobe.type) @@ -8463,7 +8491,8 @@ static int perf_uprobe_event_init(struct perf_event *event) return -EOPNOTSUPP; is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; - err = perf_uprobe_init(event, is_retprobe); + ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT; + err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe); if (err) return err; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 69a3fe926e8c..76217bbef815 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event) #endif /* CONFIG_KPROBE_EVENTS */ #ifdef CONFIG_UPROBE_EVENTS -int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe) +int perf_uprobe_init(struct perf_event *p_event, + unsigned long ref_ctr_offset, bool is_retprobe) { int ret; char *path = NULL; @@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe) goto out; } - tp_event = create_local_trace_uprobe( - path, p_event->attr.probe_offset, is_retprobe); + tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset, + ref_ctr_offset, is_retprobe); if (IS_ERR(tp_event)) { ret = PTR_ERR(tp_event); goto out; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5f52668e165d..03b10f3201a5 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -412,6 +412,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, extern void destroy_local_trace_kprobe(struct trace_event_call *event_call); extern struct trace_event_call * -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return); +create_local_trace_uprobe(char *name, unsigned long offs, + unsigned long ref_ctr_offset, bool is_return); extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3a7c73c40007..d09638706fe0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1405,7 +1405,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu) #ifdef CONFIG_PERF_EVENTS struct trace_event_call * -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) +create_local_trace_uprobe(char *name, unsigned long offs, + unsigned long ref_ctr_offset, bool is_return) { struct trace_uprobe *tu; struct path path; @@ -1437,6 +1438,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) tu->offset = offs; tu->path = path; + tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); init_trace_event_call(tu, &tu->tp.call); -- cgit v1.2.3 From 56de763052792669d61d79a087611da9a7f04d4e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:16:36 +0900 Subject: tracing: probeevent: Cleanup print argument functions Cleanup the print-argument function to decouple it into print-name and print-value, so that it can support more flexible expression, like array type. Link: http://lkml.kernel.org/r/152465859635.26224.13452846788717102315.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 20 ++++++-------------- kernel/trace/trace_probe.c | 12 +++++------- kernel/trace/trace_probe.h | 19 ++++++++++++++++--- kernel/trace/trace_uprobe.c | 9 ++------- 4 files changed, 29 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 508396edc56a..6326c71181aa 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1136,8 +1136,6 @@ print_kprobe_event(struct trace_iterator *iter, int flags, struct kprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; struct trace_probe *tp; - u8 *data; - int i; field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); @@ -1149,11 +1147,9 @@ print_kprobe_event(struct trace_iterator *iter, int flags, trace_seq_putc(s, ')'); - data = (u8 *)&field[1]; - for (i = 0; i < tp->nr_args; i++) - if (!tp->args[i].type->print(s, tp->args[i].name, - data + tp->args[i].offset, field)) - goto out; + if (print_probe_args(s, tp->args, tp->nr_args, + (u8 *)&field[1], field) < 0) + goto out; trace_seq_putc(s, '\n'); out: @@ -1167,8 +1163,6 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, struct kretprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; struct trace_probe *tp; - u8 *data; - int i; field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); @@ -1185,11 +1179,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, trace_seq_putc(s, ')'); - data = (u8 *)&field[1]; - for (i = 0; i < tp->nr_args; i++) - if (!tp->args[i].type->print(s, tp->args[i].name, - data + tp->args[i].offset, field)) - goto out; + if (print_probe_args(s, tp->args, tp->nr_args, + (u8 *)&field[1], field) < 0) + goto out; trace_seq_putc(s, '\n'); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e99c3ce7aa65..e2c184eaa7db 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -26,10 +26,9 @@ const char *reserved_field_names[] = { /* Printing in basic type function template */ #define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \ -int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \ - void *data, void *ent) \ +int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\ { \ - trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ + trace_seq_printf(s, fmt, *(type *)data); \ return !trace_seq_has_overflowed(s); \ } \ const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \ @@ -49,15 +48,14 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx") /* Print type function for string type */ -int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, - void *data, void *ent) +int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) { int len = *(u32 *)data >> 16; if (!len) - trace_seq_printf(s, " %s=(fault)", name); + trace_seq_puts(s, "(fault)"); else - trace_seq_printf(s, " %s=\"%s\"", name, + trace_seq_printf(s, "\"%s\"", (const char *)get_loc_data(data, ent)); return !trace_seq_has_overflowed(s); } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 03b10f3201a5..8254a061ac35 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -82,7 +82,7 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) /* Data fetch function type */ typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); /* Printing function type */ -typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *); +typedef int (*print_type_func_t)(struct trace_seq *, void *, void *); /* Fetch types */ enum { @@ -124,8 +124,7 @@ typedef u32 string_size; /* Printing in basic type function template */ #define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ -int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ - void *data, void *ent); \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, void *data, void *ent);\ extern const char PRINT_TYPE_FMT_NAME(type)[] DECLARE_BASIC_PRINT_TYPE_FUNC(u8); @@ -403,6 +402,20 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, } } +static inline int +print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, + u8 *data, void *field) +{ + int i; + + for (i = 0; i < nr_args; i++) { + trace_seq_printf(s, " %s=", args[i].name); + if (!args[i].type->print(s, data + args[i].offset, field)) + return -ENOMEM; + } + return 0; +} + extern int set_print_fmt(struct trace_probe *tp, bool is_return); #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d09638706fe0..c55753e1079e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -892,7 +892,6 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e struct trace_seq *s = &iter->seq; struct trace_uprobe *tu; u8 *data; - int i; entry = (struct uprobe_trace_entry_head *)iter->ent; tu = container_of(event, struct trace_uprobe, tp.call.event); @@ -909,12 +908,8 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->tp.nr_args; i++) { - struct probe_arg *parg = &tu->tp.args[i]; - - if (!parg->type->print(s, parg->name, data + parg->offset, entry)) - goto out; - } + if (print_probe_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0) + goto out; trace_seq_putc(s, '\n'); -- cgit v1.2.3 From eeb07b0615004bce145015b704de85fd3ac6cce0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:17:05 +0900 Subject: tracing: probeevent: Cleanup argument field definition Cleanup event argument definition code in one place for maintenancability. Link: http://lkml.kernel.org/r/152465862529.26224.9068605421476018902.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 32 ++++---------------------------- kernel/trace/trace_probe.c | 21 +++++++++++++++++++++ kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_uprobe.c | 15 ++------------- 4 files changed, 29 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6326c71181aa..1356927e32d0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1192,49 +1192,25 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, static int kprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i; + int ret; struct kprobe_trace_entry_head field; struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - /* Set argument names as fields */ - for (i = 0; i < tk->tp.nr_args; i++) { - struct probe_arg *parg = &tk->tp.args[i]; - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - sizeof(field) + parg->offset, - parg->type->size, - parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); } static int kretprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i; + int ret; struct kretprobe_trace_entry_head field; struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - /* Set argument names as fields */ - for (i = 0; i < tk->tp.nr_args; i++) { - struct probe_arg *parg = &tk->tp.args[i]; - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - sizeof(field) + parg->offset, - parg->type->size, - parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); } #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e2c184eaa7db..21af28ffba3a 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -668,3 +668,24 @@ int set_print_fmt(struct trace_probe *tp, bool is_return) return 0; } + +int traceprobe_define_arg_fields(struct trace_event_call *event_call, + size_t offset, struct trace_probe *tp) +{ + int ret, i; + + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) { + struct probe_arg *parg = &tp->args[i]; + + ret = trace_define_field(event_call, parg->type->fmttype, + parg->name, + offset + parg->offset, + parg->type->size, + parg->type->is_signed, + FILTER_OTHER); + if (ret) + return ret; + } + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 8254a061ac35..a1df7763b797 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -429,3 +429,5 @@ create_local_trace_uprobe(char *name, unsigned long offs, unsigned long ref_ctr_offset, bool is_return); extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif +extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, + size_t offset, struct trace_probe *tp); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c55753e1079e..28a8f69cec89 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1018,7 +1018,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) static int uprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i, size; + int ret, size; struct uprobe_trace_entry_head field; struct trace_uprobe *tu = event_call->data; @@ -1030,19 +1030,8 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call) DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); size = SIZEOF_TRACE_ENTRY(false); } - /* Set argument names as fields */ - for (i = 0; i < tu->tp.nr_args; i++) { - struct probe_arg *parg = &tu->tp.args[i]; - - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, size + parg->offset, - parg->type->size, parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, size, &tu->tp); } #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3 From 7bfbc63eda08b8158c040d6882c807f62b0750bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:17:34 +0900 Subject: tracing: probeevent: Remove NOKPROBE_SYMBOL from print functions Remove unneeded NOKPROBE_SYMBOL from print functions since the print functions are only used when printing out the trace data, and not from kprobe handler. Link: http://lkml.kernel.org/r/152465865422.26224.10111548170594014954.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 21af28ffba3a..5f3b5b3fd2cd 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -31,8 +31,7 @@ int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\ trace_seq_printf(s, fmt, *(type *)data); \ return !trace_seq_has_overflowed(s); \ } \ -const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \ -NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname)); +const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u") DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u") @@ -59,7 +58,6 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) (const char *)get_loc_data(data, ent)); return !trace_seq_has_overflowed(s); } -NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; -- cgit v1.2.3 From 533059281ee594f9fbb9e58042aaec77083ef251 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:18:03 +0900 Subject: tracing: probeevent: Introduce new argument fetching code Replace {k,u}probe event argument fetching framework with switch-case based. Currently that is implemented with structures, macros and chain of function-pointers, which is more complicated than necessary and may get a performance penalty by retpoline. This simplify that with an array of "fetch_insn" (opcode and oprands), and make process_fetch_insn() just interprets it. No function pointers are used. Link: http://lkml.kernel.org/r/152465868340.26224.2551120475197839464.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 291 +++++++++++++---------------- kernel/trace/trace_probe.c | 401 +++++++++++----------------------------- kernel/trace/trace_probe.h | 230 +++++------------------ kernel/trace/trace_probe_tmpl.h | 120 ++++++++++++ kernel/trace/trace_uprobe.c | 127 ++++++++----- 5 files changed, 491 insertions(+), 678 deletions(-) create mode 100644 kernel/trace/trace_probe_tmpl.h (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1356927e32d0..c024cc40d509 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -14,6 +14,7 @@ #include "trace_kprobe_selftest.h" #include "trace_probe.h" +#include "trace_probe_tmpl.h" #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 @@ -120,160 +121,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); -/* Memory fetching by symbol */ -struct symbol_cache { - char *symbol; - long offset; - unsigned long addr; -}; - -unsigned long update_symbol_cache(struct symbol_cache *sc) -{ - sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); - - if (sc->addr) - sc->addr += sc->offset; - - return sc->addr; -} - -void free_symbol_cache(struct symbol_cache *sc) -{ - kfree(sc->symbol); - kfree(sc); -} - -struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) -{ - struct symbol_cache *sc; - - if (!sym || strlen(sym) == 0) - return NULL; - - sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); - if (!sc) - return NULL; - - sc->symbol = kstrdup(sym, GFP_KERNEL); - if (!sc->symbol) { - kfree(sc); - return NULL; - } - sc->offset = offset; - update_symbol_cache(sc); - - return sc; -} - -/* - * Kprobes-specific fetch functions - */ -#define DEFINE_FETCH_stack(type) \ -static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ - (unsigned int)((unsigned long)offset)); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); - -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string NULL -#define fetch_stack_string_size NULL - -#define DEFINE_FETCH_memory(type) \ -static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ - void *addr, void *dest) \ -{ \ - type retval; \ - if (probe_kernel_address(addr, retval)) \ - *(type *)dest = 0; \ - else \ - *(type *)dest = retval; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); - -DEFINE_BASIC_FETCH_FUNCS(memory) -/* - * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max - * length and relative data location. - */ -static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) -{ - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - long ret; - - if (!maxlen) - return; - - /* - * Try to get string again, since the string can be changed while - * probing. - */ - ret = strncpy_from_unsafe(dst, addr, maxlen); - - if (ret < 0) { /* Failed to fetch string */ - dst[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); - } else { - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); - } -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); - -/* Return the length of string -- including null terminal byte */ -static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) -{ - mm_segment_t old_fs; - int ret, len = 0; - u8 c; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - pagefault_disable(); - - do { - ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); - len++; - } while (c && ret == 0 && len < MAX_STRING_SIZE); - - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) /* Failed to check the length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); - -#define DEFINE_FETCH_symbol(type) \ -void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ -{ \ - struct symbol_cache *sc = data; \ - if (sc->addr) \ - fetch_memory_##type(regs, (void *)sc->addr, dest); \ - else \ - *(type *)dest = 0; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); - -DEFINE_BASIC_FETCH_FUNCS(symbol) -DEFINE_FETCH_symbol(string) -DEFINE_FETCH_symbol(string_size) - -/* kprobes don't support file_offset fetch methods */ -#define fetch_file_offset_u8 NULL -#define fetch_file_offset_u16 NULL -#define fetch_file_offset_u32 NULL -#define fetch_file_offset_u64 NULL -#define fetch_file_offset_string NULL -#define fetch_file_offset_string_size NULL - /* Fetch type information table */ static const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ @@ -529,7 +376,7 @@ static bool within_notrace_func(struct trace_kprobe *tk) /* Internal register function - just handle k*probes and flags */ static int __register_trace_kprobe(struct trace_kprobe *tk) { - int i, ret; + int ret; if (trace_probe_is_registered(&tk->tp)) return -EINVAL; @@ -540,9 +387,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; } - for (i = 0; i < tk->tp.nr_args; i++) - traceprobe_update_arg(&tk->tp.args[i]); - /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(&tk->tp)) tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; @@ -876,8 +720,8 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true, - kprobes_fetch_type_table); + is_return, true, + kprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1031,6 +875,133 @@ static const struct file_operations kprobe_profile_ops = { .release = seq_release, }; +/* Kprobe specific fetch functions */ + +/* Return the length of string -- including null terminal byte */ +static nokprobe_inline void +fetch_store_strlen(unsigned long addr, void *dest) +{ + mm_segment_t old_fs; + int ret, len = 0; + u8 c; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + pagefault_disable(); + + do { + ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + len++; + } while (c && ret == 0 && len < MAX_STRING_SIZE); + + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) /* Failed to check the length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; +} + +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max + * length and relative data location. + */ +static nokprobe_inline void +fetch_store_string(unsigned long addr, void *dest) +{ + int maxlen = get_rloc_len(*(u32 *)dest); + u8 *dst = get_rloc_data(dest); + long ret; + + if (!maxlen) + return; + + /* + * Try to get string again, since the string can be changed while + * probing. + */ + ret = strncpy_from_unsafe(dst, (void *)addr, maxlen); + + if (ret < 0) { /* Failed to fetch string */ + dst[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); + } else { + *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); + } +} + +/* Note that we don't verify it, since the code does not come from user space */ +static int +process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, + bool pre) +{ + unsigned long val; + int ret; + + /* 1st stage: get value from context */ + switch (code->op) { + case FETCH_OP_REG: + val = regs_get_register(regs, code->param); + break; + case FETCH_OP_STACK: + val = regs_get_kernel_stack_nth(regs, code->param); + break; + case FETCH_OP_STACKP: + val = kernel_stack_pointer(regs); + break; + case FETCH_OP_RETVAL: + val = regs_return_value(regs); + break; + case FETCH_OP_IMM: + val = code->immediate; + break; + case FETCH_OP_COMM: + val = (unsigned long)current->comm; + break; + default: + return -EILSEQ; + } + code++; + + /* 2nd stage: dereference memory if needed */ + while (code->op == FETCH_OP_DEREF) { + ret = probe_kernel_read(&val, (void *)val + code->offset, + sizeof(val)); + if (ret) + return ret; + code++; + } + + /* 3rd stage: store value to buffer */ + switch (code->op) { + case FETCH_OP_ST_RAW: + fetch_store_raw(val, code, dest); + break; + case FETCH_OP_ST_MEM: + probe_kernel_read(dest, (void *)val + code->offset, code->size); + break; + case FETCH_OP_ST_STRING: + if (pre) + fetch_store_strlen(val + code->offset, dest); + else + fetch_store_string(val + code->offset, dest); + break; + default: + return -EILSEQ; + } + code++; + + /* 4th stage: modify stored value if needed */ + if (code->op == FETCH_OP_MOD_BF) { + fetch_apply_bitfield(code, dest); + code++; + } + + return code->op == FETCH_OP_END ? 0 : -EILSEQ; +} +NOKPROBE_SYMBOL(process_fetch_insn) + /* Kprobe handler */ static nokprobe_inline void __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5f3b5b3fd2cd..c59c69cb2f2e 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -61,174 +61,6 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; -#define CHECK_FETCH_FUNCS(method, fn) \ - (((FETCH_FUNC_NAME(method, u8) == fn) || \ - (FETCH_FUNC_NAME(method, u16) == fn) || \ - (FETCH_FUNC_NAME(method, u32) == fn) || \ - (FETCH_FUNC_NAME(method, u64) == fn) || \ - (FETCH_FUNC_NAME(method, string) == fn) || \ - (FETCH_FUNC_NAME(method, string_size) == fn)) \ - && (fn != NULL)) - -/* Data fetch function templates */ -#define DEFINE_FETCH_reg(type) \ -void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_register(regs, \ - (unsigned int)((unsigned long)offset)); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type)); -DEFINE_BASIC_FETCH_FUNCS(reg) -/* No string on the register */ -#define fetch_reg_string NULL -#define fetch_reg_string_size NULL - -#define DEFINE_FETCH_retval(type) \ -void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ - void *dummy, void *dest) \ -{ \ - *(type *)dest = (type)regs_return_value(regs); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type)); -DEFINE_BASIC_FETCH_FUNCS(retval) -/* No string on the retval */ -#define fetch_retval_string NULL -#define fetch_retval_string_size NULL - -/* Dereference memory access function */ -struct deref_fetch_param { - struct fetch_param orig; - long offset; - fetch_func_t fetch; - fetch_func_t fetch_size; -}; - -#define DEFINE_FETCH_deref(type) \ -void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ - void *data, void *dest) \ -{ \ - struct deref_fetch_param *dprm = data; \ - unsigned long addr; \ - call_fetch(&dprm->orig, regs, &addr); \ - if (addr) { \ - addr += dprm->offset; \ - dprm->fetch(regs, (void *)addr, dest); \ - } else \ - *(type *)dest = 0; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type)); -DEFINE_BASIC_FETCH_FUNCS(deref) -DEFINE_FETCH_deref(string) - -void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, - void *data, void *dest) -{ - struct deref_fetch_param *dprm = data; - unsigned long addr; - - call_fetch(&dprm->orig, regs, &addr); - if (addr && dprm->fetch_size) { - addr += dprm->offset; - dprm->fetch_size(regs, (void *)addr, dest); - } else - *(string_size *)dest = 0; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); - -static void update_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} -NOKPROBE_SYMBOL(update_deref_fetch_param); - -static void free_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - kfree(data); -} -NOKPROBE_SYMBOL(free_deref_fetch_param); - -/* Bitfield fetch function */ -struct bitfield_fetch_param { - struct fetch_param orig; - unsigned char hi_shift; - unsigned char low_shift; -}; - -#define DEFINE_FETCH_bitfield(type) \ -void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ - void *data, void *dest) \ -{ \ - struct bitfield_fetch_param *bprm = data; \ - type buf = 0; \ - call_fetch(&bprm->orig, regs, &buf); \ - if (buf) { \ - buf <<= bprm->hi_shift; \ - buf >>= bprm->low_shift; \ - } \ - *(type *)dest = buf; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type)); -DEFINE_BASIC_FETCH_FUNCS(bitfield) -#define fetch_bitfield_string NULL -#define fetch_bitfield_string_size NULL - -static void -update_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} - -static void -free_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - - kfree(data); -} - -void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs, - void *data, void *dest) -{ - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - long ret; - - if (!maxlen) - return; - - ret = strlcpy(dst, current->comm, maxlen); - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string)); - -void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs, - void *data, void *dest) -{ - *(u32 *)dest = strlen(current->comm) + 1; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size)); - static const struct fetch_type *find_fetch_type(const char *type, const struct fetch_type *ftbl) { @@ -272,37 +104,6 @@ fail: return NULL; } -/* Special function : only accept unsigned long */ -static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest) -{ - *(unsigned long *)dest = kernel_stack_pointer(regs); -} -NOKPROBE_SYMBOL(fetch_kernel_stack_address); - -static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) -{ - *(unsigned long *)dest = user_stack_pointer(regs); -} -NOKPROBE_SYMBOL(fetch_user_stack_address); - -static fetch_func_t get_fetch_size_function(const struct fetch_type *type, - fetch_func_t orig_fn, - const struct fetch_type *ftbl) -{ - int i; - - if (type != &ftbl[FETCH_TYPE_STRING]) - return NULL; /* Only string type needs size function */ - - for (i = 0; i < FETCH_MTD_END; i++) - if (type->fetch[i] == orig_fn) - return ftbl[FETCH_TYPE_STRSIZE].fetch[i]; - - WARN_ON(1); /* This should not happen */ - - return NULL; -} - /* Split symbol and offset. */ int traceprobe_split_symbol_offset(char *symbol, long *offset) { @@ -327,7 +128,7 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, + struct fetch_insn *code, bool is_return, bool is_kprobe) { int ret = 0; @@ -335,33 +136,24 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, if (strcmp(arg, "retval") == 0) { if (is_return) - f->fn = t->fetch[FETCH_MTD_retval]; + code->op = FETCH_OP_RETVAL; else ret = -EINVAL; } else if (strncmp(arg, "stack", 5) == 0) { if (arg[5] == '\0') { - if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR)) - return -EINVAL; - - if (is_kprobe) - f->fn = fetch_kernel_stack_address; - else - f->fn = fetch_user_stack_address; + code->op = FETCH_OP_STACKP; } else if (isdigit(arg[5])) { ret = kstrtoul(arg + 5, 10, ¶m); if (ret || (is_kprobe && param > PARAM_MAX_STACK)) ret = -EINVAL; else { - f->fn = t->fetch[FETCH_MTD_stack]; - f->data = (void *)param; + code->op = FETCH_OP_STACK; + code->param = (unsigned int)param; } } else ret = -EINVAL; } else if (strcmp(arg, "comm") == 0) { - if (strcmp(t->name, "string") != 0 && - strcmp(t->name, "string_size") != 0) - return -EINVAL; - f->fn = t->fetch[FETCH_MTD_comm]; + code->op = FETCH_OP_COMM; } else ret = -EINVAL; @@ -369,10 +161,13 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, } /* Recursive argument parser */ -static int parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, bool is_kprobe, - const struct fetch_type *ftbl) +static int +parse_probe_arg(char *arg, const struct fetch_type *type, + struct fetch_insn **pcode, struct fetch_insn *end, + bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { + struct fetch_insn *code = *pcode; unsigned long param; long offset; char *tmp; @@ -380,14 +175,15 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); + ret = parse_probe_vars(arg + 1, type, code, + is_return, is_kprobe); break; case '%': /* named register */ ret = regs_query_register_offset(arg + 1); if (ret >= 0) { - f->fn = t->fetch[FETCH_MTD_reg]; - f->data = (void *)(unsigned long)ret; + code->op = FETCH_OP_REG; + code->param = (unsigned int)ret; ret = 0; } break; @@ -397,9 +193,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, ret = kstrtoul(arg + 1, 0, ¶m); if (ret) break; - - f->fn = t->fetch[FETCH_MTD_memory]; - f->data = (void *)param; + /* load address */ + code->op = FETCH_OP_IMM; + code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ if (is_kprobe) @@ -409,8 +205,8 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, if (ret) break; - f->fn = t->fetch[FETCH_MTD_file_offset]; - f->data = (void *)offset; + code->op = FETCH_OP_FOFFS; + code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ if (!is_kprobe) @@ -420,10 +216,19 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, if (ret) break; - f->data = alloc_symbol_cache(arg + 1, offset); - if (f->data) - f->fn = t->fetch[FETCH_MTD_symbol]; + code->op = FETCH_OP_IMM; + code->immediate = + (unsigned long)kallsyms_lookup_name(arg + 1); + if (!code->immediate) + return -ENOENT; + code->immediate += offset; } + /* These are fetching from memory */ + if (++code == end) + return -E2BIG; + *pcode = code; + code->op = FETCH_OP_DEREF; + code->offset = offset; break; case '+': /* deref memory */ @@ -431,11 +236,10 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, case '-': tmp = strchr(arg, '('); if (!tmp) - break; + return -EINVAL; *tmp = '\0'; ret = kstrtol(arg, 0, &offset); - if (ret) break; @@ -443,36 +247,29 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, tmp = strrchr(arg, ')'); if (tmp) { - struct deref_fetch_param *dprm; - const struct fetch_type *t2; + const struct fetch_type *t2; t2 = find_fetch_type(NULL, ftbl); *tmp = '\0'; - dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL); - - if (!dprm) - return -ENOMEM; - - dprm->offset = offset; - dprm->fetch = t->fetch[FETCH_MTD_memory]; - dprm->fetch_size = get_fetch_size_function(t, - dprm->fetch, ftbl); - ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, - is_kprobe, ftbl); + ret = parse_probe_arg(arg, t2, &code, end, is_return, + is_kprobe, ftbl); if (ret) - kfree(dprm); - else { - f->fn = t->fetch[FETCH_MTD_deref]; - f->data = (void *)dprm; - } + break; + if (code->op == FETCH_OP_COMM) + return -EINVAL; + if (++code == end) + return -E2BIG; + *pcode = code; + + code->op = FETCH_OP_DEREF; + code->offset = offset; } break; } - if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ - pr_info("%s type has no corresponding fetch method.\n", t->name); + if (!ret && code->op == FETCH_OP_NOP) { + /* Parsed, but do not find fetch method */ ret = -EINVAL; } - return ret; } @@ -481,22 +278,15 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, /* Bitfield type needs to be parsed into a fetch function */ static int __parse_bitfield_probe_arg(const char *bf, const struct fetch_type *t, - struct fetch_param *f) + struct fetch_insn **pcode) { - struct bitfield_fetch_param *bprm; + struct fetch_insn *code = *pcode; unsigned long bw, bo; char *tail; if (*bf != 'b') return 0; - bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); - if (!bprm) - return -ENOMEM; - - bprm->orig = *f; - f->fn = t->fetch[FETCH_MTD_bitfield]; - f->data = (void *)bprm; bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ if (bw == 0 || *tail != '@') @@ -507,9 +297,15 @@ static int __parse_bitfield_probe_arg(const char *bf, if (tail == bf || *tail != '/') return -EINVAL; + code++; + if (code->op != FETCH_OP_NOP) + return -E2BIG; + *pcode = code; - bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); - bprm->low_shift = bprm->hi_shift + bo; + code->op = FETCH_OP_MOD_BF; + code->lshift = BYTES_TO_BITS(t->size) - (bw + bo); + code->rshift = BYTES_TO_BITS(t->size) - bw; + code->basesize = t->size; return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; } @@ -519,6 +315,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, bool is_return, bool is_kprobe, const struct fetch_type *ftbl) { + struct fetch_insn *code, *tmp = NULL; const char *t; int ret; @@ -549,18 +346,60 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, } parg->offset = *size; *size += parg->type->size; - ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, - is_kprobe, ftbl); - - if (ret >= 0 && t != NULL) - ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); - if (ret >= 0) { - parg->fetch_size.fn = get_fetch_size_function(parg->type, - parg->fetch.fn, - ftbl); - parg->fetch_size.data = parg->fetch.data; + code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); + if (!code) + return -ENOMEM; + code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; + + ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], + is_return, is_kprobe, ftbl); + if (ret) + goto fail; + + /* Store operation */ + if (!strcmp(parg->type->name, "string")) { + if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && + code->op != FETCH_OP_COMM) { + pr_info("string only accepts memory or address.\n"); + ret = -EINVAL; + goto fail; + } + /* Since IMM or COMM must be the 1st insn, this is safe */ + if (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) + code++; + code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */ + parg->dynamic = true; + } else if (code->op == FETCH_OP_DEREF) { + code->op = FETCH_OP_ST_MEM; + code->size = parg->type->size; + } else { + code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + code->op = FETCH_OP_ST_RAW; + code->size = parg->type->size; + } + /* Modify operation */ + if (t != NULL) { + ret = __parse_bitfield_probe_arg(t, parg->type, &code); + if (ret) + goto fail; } + code++; + code->op = FETCH_OP_END; + + /* Shrink down the code buffer */ + parg->code = kzalloc(sizeof(*code) * (code - tmp + 1), GFP_KERNEL); + if (!parg->code) + ret = -ENOMEM; + else + memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1)); + +fail: + kfree(tmp); return ret; } @@ -582,25 +421,9 @@ int traceprobe_conflict_field_name(const char *name, return 0; } -void traceprobe_update_arg(struct probe_arg *arg) -{ - if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) - update_bitfield_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) - update_deref_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) - update_symbol_cache(arg->fetch.data); -} - void traceprobe_free_probe_arg(struct probe_arg *arg) { - if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) - free_bitfield_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) - free_deref_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) - free_symbol_cache(arg->fetch.data); - + kfree(arg->code); kfree(arg->name); kfree(arg->comm); } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index a1df7763b797..42c724a7ad11 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -79,25 +79,50 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) return (u8 *)ent + get_rloc_offs(*dl); } -/* Data fetch function type */ -typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); /* Printing function type */ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *); -/* Fetch types */ -enum { - FETCH_MTD_reg = 0, - FETCH_MTD_stack, - FETCH_MTD_retval, - FETCH_MTD_comm, - FETCH_MTD_memory, - FETCH_MTD_symbol, - FETCH_MTD_deref, - FETCH_MTD_bitfield, - FETCH_MTD_file_offset, - FETCH_MTD_END, +enum fetch_op { + FETCH_OP_NOP = 0, + // Stage 1 (load) ops + FETCH_OP_REG, /* Register : .param = offset */ + FETCH_OP_STACK, /* Stack : .param = index */ + FETCH_OP_STACKP, /* Stack pointer */ + FETCH_OP_RETVAL, /* Return value */ + FETCH_OP_IMM, /* Immediate : .immediate */ + FETCH_OP_COMM, /* Current comm */ + FETCH_OP_FOFFS, /* File offset: .immediate */ + // Stage 2 (dereference) op + FETCH_OP_DEREF, /* Dereference: .offset */ + // Stage 3 (store) ops + FETCH_OP_ST_RAW, /* Raw: .size */ + FETCH_OP_ST_MEM, /* Mem: .offset, .size */ + FETCH_OP_ST_STRING, /* String: .offset, .size */ + // Stage 4 (modify) op + FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ + FETCH_OP_END, }; +struct fetch_insn { + enum fetch_op op; + union { + unsigned int param; + struct { + unsigned int size; + int offset; + }; + struct { + unsigned char basesize; + unsigned char lshift; + unsigned char rshift; + }; + unsigned long immediate; + }; +}; + +/* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */ +#define FETCH_INSN_MAX 16 + /* Fetch type information table */ struct fetch_type { const char *name; /* Name of type */ @@ -106,13 +131,6 @@ struct fetch_type { print_type_func_t print; /* Print functions */ const char *fmt; /* Fromat string */ const char *fmttype; /* Name in format file */ - /* Fetch functions */ - fetch_func_t fetch[FETCH_MTD_END]; -}; - -struct fetch_param { - fetch_func_t fn; - void *data; }; /* For defining macros, define string/string_size types */ @@ -142,66 +160,12 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(x64); DECLARE_BASIC_PRINT_TYPE_FUNC(string); -#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type - -/* Declare macro for basic types */ -#define DECLARE_FETCH_FUNC(method, type) \ -extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \ - void *data, void *dest) - -#define DECLARE_BASIC_FETCH_FUNCS(method) \ -DECLARE_FETCH_FUNC(method, u8); \ -DECLARE_FETCH_FUNC(method, u16); \ -DECLARE_FETCH_FUNC(method, u32); \ -DECLARE_FETCH_FUNC(method, u64) - -DECLARE_BASIC_FETCH_FUNCS(reg); -#define fetch_reg_string NULL -#define fetch_reg_string_size NULL - -DECLARE_BASIC_FETCH_FUNCS(retval); -#define fetch_retval_string NULL -#define fetch_retval_string_size NULL - -DECLARE_BASIC_FETCH_FUNCS(symbol); -DECLARE_FETCH_FUNC(symbol, string); -DECLARE_FETCH_FUNC(symbol, string_size); - -DECLARE_BASIC_FETCH_FUNCS(deref); -DECLARE_FETCH_FUNC(deref, string); -DECLARE_FETCH_FUNC(deref, string_size); - -DECLARE_BASIC_FETCH_FUNCS(bitfield); -#define fetch_bitfield_string NULL -#define fetch_bitfield_string_size NULL - -/* comm only makes sense as a string */ -#define fetch_comm_u8 NULL -#define fetch_comm_u16 NULL -#define fetch_comm_u32 NULL -#define fetch_comm_u64 NULL -DECLARE_FETCH_FUNC(comm, string); -DECLARE_FETCH_FUNC(comm, string_size); - -/* - * Define macro for basic types - we don't need to define s* types, because - * we have to care only about bitwidth at recording time. - */ -#define DEFINE_BASIC_FETCH_FUNCS(method) \ -DEFINE_FETCH_##method(u8) \ -DEFINE_FETCH_##method(u16) \ -DEFINE_FETCH_##method(u32) \ -DEFINE_FETCH_##method(u64) - /* Default (unsigned long) fetch type */ #define __DEFAULT_FETCH_TYPE(t) x##t #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) -#define ASSIGN_FETCH_FUNC(method, type) \ - [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) - #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ {.name = _name, \ .size = _size, \ @@ -209,17 +173,6 @@ DEFINE_FETCH_##method(u64) .print = PRINT_TYPE_FUNC_NAME(ptype), \ .fmt = PRINT_TYPE_FMT_NAME(ptype), \ .fmttype = _fmttype, \ - .fetch = { \ -ASSIGN_FETCH_FUNC(reg, ftype), \ -ASSIGN_FETCH_FUNC(stack, ftype), \ -ASSIGN_FETCH_FUNC(retval, ftype), \ -ASSIGN_FETCH_FUNC(comm, ftype), \ -ASSIGN_FETCH_FUNC(memory, ftype), \ -ASSIGN_FETCH_FUNC(symbol, ftype), \ -ASSIGN_FETCH_FUNC(deref, ftype), \ -ASSIGN_FETCH_FUNC(bitfield, ftype), \ -ASSIGN_FETCH_FUNC(file_offset, ftype), \ - } \ } #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ @@ -231,42 +184,13 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define ASSIGN_FETCH_TYPE_END {} -#define FETCH_TYPE_STRING 0 -#define FETCH_TYPE_STRSIZE 1 +#define FETCH_TYPE_STRING 0 +#define FETCH_TYPE_STRSIZE 1 #ifdef CONFIG_KPROBE_EVENTS -struct symbol_cache; -unsigned long update_symbol_cache(struct symbol_cache *sc); -void free_symbol_cache(struct symbol_cache *sc); -struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); bool trace_kprobe_on_func_entry(struct trace_event_call *call); bool trace_kprobe_error_injectable(struct trace_event_call *call); #else -/* uprobes do not support symbol fetch methods */ -#define fetch_symbol_u8 NULL -#define fetch_symbol_u16 NULL -#define fetch_symbol_u32 NULL -#define fetch_symbol_u64 NULL -#define fetch_symbol_string NULL -#define fetch_symbol_string_size NULL - -struct symbol_cache { -}; -static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc) -{ - return 0; -} - -static inline void __used free_symbol_cache(struct symbol_cache *sc) -{ -} - -static inline struct symbol_cache * __used -alloc_symbol_cache(const char *sym, long offset) -{ - return NULL; -} - static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call) { return false; @@ -279,8 +203,8 @@ static inline bool trace_kprobe_error_injectable(struct trace_event_call *call) #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { - struct fetch_param fetch; - struct fetch_param fetch_size; + struct fetch_insn *code; + bool dynamic;/* Dynamic array (string) is used */ unsigned int offset; /* Offset from argument entry */ const char *name; /* Name of this argument */ const char *comm; /* Command of this argument */ @@ -312,12 +236,6 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) return !!(tp->flags & TP_FLAG_REGISTERED); } -static nokprobe_inline void call_fetch(struct fetch_param *fprm, - struct pt_regs *regs, void *dest) -{ - return fprm->fn(regs, fprm->data, dest); -} - /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { @@ -354,68 +272,6 @@ extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -/* Sum up total data length for dynamic arraies (strings) */ -static nokprobe_inline int -__get_data_size(struct trace_probe *tp, struct pt_regs *regs) -{ - int i, ret = 0; - u32 len; - - for (i = 0; i < tp->nr_args; i++) - if (unlikely(tp->args[i].fetch_size.fn)) { - call_fetch(&tp->args[i].fetch_size, regs, &len); - ret += len; - } - - return ret; -} - -/* Store the value of each argument */ -static nokprobe_inline void -store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, - u8 *data, int maxlen) -{ - int i; - u32 end = tp->size; - u32 *dl; /* Data (relative) location */ - - for (i = 0; i < tp->nr_args; i++) { - if (unlikely(tp->args[i].fetch_size.fn)) { - /* - * First, we set the relative location and - * maximum data length to *dl - */ - dl = (u32 *)(data + tp->args[i].offset); - *dl = make_data_rloc(maxlen, end - tp->args[i].offset); - /* Then try to fetch string or dynamic array data */ - call_fetch(&tp->args[i].fetch, regs, dl); - /* Reduce maximum length */ - end += get_rloc_len(*dl); - maxlen -= get_rloc_len(*dl); - /* Trick here, convert data_rloc to data_loc */ - *dl = convert_rloc_to_loc(*dl, - ent_size + tp->args[i].offset); - } else - /* Just fetching data normally */ - call_fetch(&tp->args[i].fetch, regs, - data + tp->args[i].offset); - } -} - -static inline int -print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, - u8 *data, void *field) -{ - int i; - - for (i = 0; i < nr_args; i++) { - trace_seq_printf(s, " %s=", args[i].name); - if (!args[i].type->print(s, data + args[i].offset, field)) - return -ENOMEM; - } - return 0; -} - extern int set_print_fmt(struct trace_probe *tp, bool is_return); #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h new file mode 100644 index 000000000000..c8a5272abf01 --- /dev/null +++ b/kernel/trace/trace_probe_tmpl.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Traceprobe fetch helper inlines + */ + +static nokprobe_inline void +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf) +{ + switch (code->size) { + case 1: + *(u8 *)buf = (u8)val; + break; + case 2: + *(u16 *)buf = (u16)val; + break; + case 4: + *(u32 *)buf = (u32)val; + break; + case 8: + //TBD: 32bit signed + *(u64 *)buf = (u64)val; + break; + default: + *(unsigned long *)buf = val; + } +} + +static nokprobe_inline void +fetch_apply_bitfield(struct fetch_insn *code, void *buf) +{ + switch (code->basesize) { + case 1: + *(u8 *)buf <<= code->lshift; + *(u8 *)buf >>= code->rshift; + break; + case 2: + *(u16 *)buf <<= code->lshift; + *(u16 *)buf >>= code->rshift; + break; + case 4: + *(u32 *)buf <<= code->lshift; + *(u32 *)buf >>= code->rshift; + break; + case 8: + *(u64 *)buf <<= code->lshift; + *(u64 *)buf >>= code->rshift; + break; + } +} + +/* Define this for each callsite */ +static int +process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, + void *dest, bool pre); + +/* Sum up total data length for dynamic arraies (strings) */ +static nokprobe_inline int +__get_data_size(struct trace_probe *tp, struct pt_regs *regs) +{ + struct probe_arg *arg; + int i, ret = 0; + u32 len; + + for (i = 0; i < tp->nr_args; i++) { + arg = tp->args + i; + if (unlikely(arg->dynamic)) { + process_fetch_insn(arg->code, regs, &len, true); + ret += len; + } + } + + return ret; +} + +/* Store the value of each argument */ +static nokprobe_inline void +store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, + u8 *data, int maxlen) +{ + struct probe_arg *arg; + u32 end = tp->size; + u32 *dl; /* Data (relative) location */ + int i; + + for (i = 0; i < tp->nr_args; i++) { + arg = tp->args + i; + if (unlikely(arg->dynamic)) { + /* + * First, we set the relative location and + * maximum data length to *dl + */ + dl = (u32 *)(data + arg->offset); + *dl = make_data_rloc(maxlen, end - arg->offset); + /* Then try to fetch string or dynamic array data */ + process_fetch_insn(arg->code, regs, dl, false); + /* Reduce maximum length */ + end += get_rloc_len(*dl); + maxlen -= get_rloc_len(*dl); + /* Trick here, convert data_rloc to data_loc */ + *dl = convert_rloc_to_loc(*dl, ent_size + arg->offset); + } else + /* Just fetching data normally */ + process_fetch_insn(arg->code, regs, data + arg->offset, + false); + } +} + +static inline int +print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, + u8 *data, void *field) +{ + int i; + + for (i = 0; i < nr_args; i++) { + trace_seq_printf(s, " %s=", args[i].name); + if (!args[i].type->print(s, data + args[i].offset, field)) + return -ENOMEM; + } + return 0; +} diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 28a8f69cec89..e076f89ab33a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -15,6 +15,7 @@ #include #include "trace_probe.h" +#include "trace_probe_tmpl.h" #define UPROBE_EVENT_SYSTEM "uprobes" @@ -99,37 +100,19 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) /* * Uprobes-specific fetch functions */ -#define DEFINE_FETCH_stack(type) \ -static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)get_user_stack_nth(regs, \ - ((unsigned long)offset)); \ -} -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string NULL -#define fetch_stack_string_size NULL - -#define DEFINE_FETCH_memory(type) \ -static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ - void *addr, void *dest) \ -{ \ - type retval; \ - void __user *vaddr = (void __force __user *) addr; \ - \ - if (copy_from_user(&retval, vaddr, sizeof(type))) \ - *(type *)dest = 0; \ - else \ - *(type *) dest = retval; \ +static nokprobe_inline int +probe_user_read(void *dest, void *src, size_t size) +{ + void __user *vaddr = (void __force __user *)src; + + return copy_from_user(dest, vaddr, size); } -DEFINE_BASIC_FETCH_FUNCS(memory) /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. */ -static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) +static nokprobe_inline void +fetch_store_string(unsigned long addr, void *dest) { long ret; u32 rloc = *(u32 *)dest; @@ -152,8 +135,9 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, } } -static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) +/* Return the length of string -- including null terminal byte */ +static nokprobe_inline void +fetch_store_strlen(unsigned long addr, void *dest) { int len; void __user *vaddr = (void __force __user *) addr; @@ -166,7 +150,7 @@ static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, *(u32 *)dest = len; } -static unsigned long translate_user_vaddr(void *file_offset) +static unsigned long translate_user_vaddr(unsigned long file_offset) { unsigned long base_addr; struct uprobe_dispatch_data *udd; @@ -174,21 +158,9 @@ static unsigned long translate_user_vaddr(void *file_offset) udd = (void *) current->utask->vaddr; base_addr = udd->bp_addr - udd->tu->offset; - return base_addr + (unsigned long)file_offset; + return base_addr + file_offset; } -#define DEFINE_FETCH_file_offset(type) \ -static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \ - void *offset, void *dest)\ -{ \ - void *vaddr = (void *)translate_user_vaddr(offset); \ - \ - FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \ -} -DEFINE_BASIC_FETCH_FUNCS(file_offset) -DEFINE_FETCH_file_offset(string) -DEFINE_FETCH_file_offset(string_size) - /* Fetch type information table */ static const struct fetch_type uprobes_fetch_type_table[] = { /* Special types */ @@ -213,6 +185,77 @@ static const struct fetch_type uprobes_fetch_type_table[] = { ASSIGN_FETCH_TYPE_END }; +/* Note that we don't verify it, since the code does not come from user space */ +static int +process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, + bool pre) +{ + unsigned long val; + int ret; + + /* 1st stage: get value from context */ + switch (code->op) { + case FETCH_OP_REG: + val = regs_get_register(regs, code->param); + break; + case FETCH_OP_STACK: + val = get_user_stack_nth(regs, code->param); + break; + case FETCH_OP_STACKP: + val = user_stack_pointer(regs); + break; + case FETCH_OP_RETVAL: + val = regs_return_value(regs); + break; + case FETCH_OP_IMM: + val = code->immediate; + break; + case FETCH_OP_FOFFS: + val = translate_user_vaddr(code->immediate); + break; + default: + return -EILSEQ; + } + code++; + + /* 2nd stage: dereference memory if needed */ + while (code->op == FETCH_OP_DEREF) { + ret = probe_user_read(&val, (void *)val + code->offset, + sizeof(val)); + if (ret) + return ret; + code++; + } + + /* 3rd stage: store value to buffer */ + switch (code->op) { + case FETCH_OP_ST_RAW: + fetch_store_raw(val, code, dest); + break; + case FETCH_OP_ST_MEM: + probe_user_read(dest, (void *)val + code->offset, code->size); + break; + case FETCH_OP_ST_STRING: + if (pre) + fetch_store_strlen(val + code->offset, dest); + else + fetch_store_string(val + code->offset, dest); + break; + default: + return -EILSEQ; + } + code++; + + /* 4th stage: modify stored value if needed */ + if (code->op == FETCH_OP_MOD_BF) { + fetch_apply_bitfield(code, dest); + code++; + } + + return code->op == FETCH_OP_END ? 0 : -EILSEQ; +} +NOKPROBE_SYMBOL(process_fetch_insn) + static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) { rwlock_init(&filter->rwlock); -- cgit v1.2.3 From f451bc89d8357f010304564728ba7c5d38a1d4d5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:18:32 +0900 Subject: tracing: probeevent: Unify fetch type tables Unify {k,u}probe_fetch_type_table to probe_fetch_type_table because the main difference of those type tables (fetcharg methods) are gone. Now we can consolidate it. Link: http://lkml.kernel.org/r/152465871274.26224.13999436317830479698.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 27 +---------------------- kernel/trace/trace_probe.c | 54 ++++++++++++++++++++++++++++++--------------- kernel/trace/trace_probe.h | 6 +---- kernel/trace/trace_uprobe.c | 27 +---------------------- 4 files changed, 39 insertions(+), 75 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c024cc40d509..dc1c638daf44 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -121,30 +121,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); -/* Fetch type information table */ -static const struct fetch_type kprobes_fetch_type_table[] = { - /* Special types */ - [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, - sizeof(u32), 1, "__data_loc char[]"), - [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, - string_size, sizeof(u32), 0, "u32"), - /* Basic types */ - ASSIGN_FETCH_TYPE(u8, u8, 0), - ASSIGN_FETCH_TYPE(u16, u16, 0), - ASSIGN_FETCH_TYPE(u32, u32, 0), - ASSIGN_FETCH_TYPE(u64, u64, 0), - ASSIGN_FETCH_TYPE(s8, u8, 1), - ASSIGN_FETCH_TYPE(s16, u16, 1), - ASSIGN_FETCH_TYPE(s32, u32, 1), - ASSIGN_FETCH_TYPE(s64, u64, 1), - ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0), - ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), - ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), - ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), - - ASSIGN_FETCH_TYPE_END -}; - /* * Allocate new trace_probe and initialize it (including kprobes). */ @@ -720,8 +696,7 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true, - kprobes_fetch_type_table); + is_return, true); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index c59c69cb2f2e..d06e67cca3e1 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -61,8 +61,29 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; -static const struct fetch_type *find_fetch_type(const char *type, - const struct fetch_type *ftbl) +/* Fetch type information table */ +static const struct fetch_type probe_fetch_types[] = { + /* Special types */ + __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, + "__data_loc char[]"), + /* Basic types */ + ASSIGN_FETCH_TYPE(u8, u8, 0), + ASSIGN_FETCH_TYPE(u16, u16, 0), + ASSIGN_FETCH_TYPE(u32, u32, 0), + ASSIGN_FETCH_TYPE(u64, u64, 0), + ASSIGN_FETCH_TYPE(s8, u8, 1), + ASSIGN_FETCH_TYPE(s16, u16, 1), + ASSIGN_FETCH_TYPE(s32, u32, 1), + ASSIGN_FETCH_TYPE(s64, u64, 1), + ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0), + ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), + ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), + ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), + + ASSIGN_FETCH_TYPE_END +}; + +static const struct fetch_type *find_fetch_type(const char *type) { int i; @@ -83,21 +104,21 @@ static const struct fetch_type *find_fetch_type(const char *type, switch (bs) { case 8: - return find_fetch_type("u8", ftbl); + return find_fetch_type("u8"); case 16: - return find_fetch_type("u16", ftbl); + return find_fetch_type("u16"); case 32: - return find_fetch_type("u32", ftbl); + return find_fetch_type("u32"); case 64: - return find_fetch_type("u64", ftbl); + return find_fetch_type("u64"); default: goto fail; } } - for (i = 0; ftbl[i].name; i++) { - if (strcmp(type, ftbl[i].name) == 0) - return &ftbl[i]; + for (i = 0; probe_fetch_types[i].name; i++) { + if (strcmp(type, probe_fetch_types[i].name) == 0) + return &probe_fetch_types[i]; } fail: @@ -164,8 +185,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, static int parse_probe_arg(char *arg, const struct fetch_type *type, struct fetch_insn **pcode, struct fetch_insn *end, - bool is_return, bool is_kprobe, - const struct fetch_type *ftbl) + bool is_return, bool is_kprobe) { struct fetch_insn *code = *pcode; unsigned long param; @@ -247,12 +267,11 @@ parse_probe_arg(char *arg, const struct fetch_type *type, tmp = strrchr(arg, ')'); if (tmp) { - const struct fetch_type *t2; + const struct fetch_type *t2 = find_fetch_type(NULL); - t2 = find_fetch_type(NULL, ftbl); *tmp = '\0'; ret = parse_probe_arg(arg, t2, &code, end, is_return, - is_kprobe, ftbl); + is_kprobe); if (ret) break; if (code->op == FETCH_OP_COMM) @@ -312,8 +331,7 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe, - const struct fetch_type *ftbl) + struct probe_arg *parg, bool is_return, bool is_kprobe) { struct fetch_insn *code, *tmp = NULL; const char *t; @@ -339,7 +357,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, */ if (!t && strcmp(arg, "$comm") == 0) t = "string"; - parg->type = find_fetch_type(t, ftbl); + parg->type = find_fetch_type(t); if (!parg->type) { pr_info("Unsupported type: %s\n", t); return -EINVAL; @@ -353,7 +371,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - is_return, is_kprobe, ftbl); + is_return, is_kprobe); if (ret) goto fail; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 42c724a7ad11..5c262ed6347c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -184,9 +184,6 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(string); #define ASSIGN_FETCH_TYPE_END {} -#define FETCH_TYPE_STRING 0 -#define FETCH_TYPE_STRSIZE 1 - #ifdef CONFIG_KPROBE_EVENTS bool trace_kprobe_on_func_entry(struct trace_event_call *call); bool trace_kprobe_error_injectable(struct trace_event_call *call); @@ -261,8 +258,7 @@ find_event_file_link(struct trace_probe *tp, struct trace_event_file *file) } extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe, - const struct fetch_type *ftbl); + struct probe_arg *parg, bool is_return, bool is_kprobe); extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e076f89ab33a..7772fec84c12 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -161,30 +161,6 @@ static unsigned long translate_user_vaddr(unsigned long file_offset) return base_addr + file_offset; } -/* Fetch type information table */ -static const struct fetch_type uprobes_fetch_type_table[] = { - /* Special types */ - [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, - sizeof(u32), 1, "__data_loc char[]"), - [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, - string_size, sizeof(u32), 0, "u32"), - /* Basic types */ - ASSIGN_FETCH_TYPE(u8, u8, 0), - ASSIGN_FETCH_TYPE(u16, u16, 0), - ASSIGN_FETCH_TYPE(u32, u32, 0), - ASSIGN_FETCH_TYPE(u64, u64, 0), - ASSIGN_FETCH_TYPE(s8, u8, 1), - ASSIGN_FETCH_TYPE(s16, u16, 1), - ASSIGN_FETCH_TYPE(s32, u32, 1), - ASSIGN_FETCH_TYPE(s64, u64, 1), - ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0), - ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), - ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), - ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), - - ASSIGN_FETCH_TYPE_END -}; - /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, @@ -621,8 +597,7 @@ static int create_trace_uprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, - is_return, false, - uprobes_fetch_type_table); + is_return, false); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; -- cgit v1.2.3 From 9178412ddf5a98feba0ad3986111c5ad10eb9e59 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:19:01 +0900 Subject: tracing: probeevent: Return consumed bytes of dynamic area Cleanup string fetching routine so that returns the consumed bytes of dynamic area and store the string information as data_loc format instead of data_rloc. This simplifies the fetcharg loop. Link: http://lkml.kernel.org/r/152465874163.26224.12125143907501289031.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 57 ++++++++++++++++++-------------------- kernel/trace/trace_probe.h | 26 ++++-------------- kernel/trace/trace_probe_tmpl.h | 54 +++++++++++++++++------------------- kernel/trace/trace_uprobe.c | 61 ++++++++++++++++++++--------------------- 4 files changed, 88 insertions(+), 110 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index dc1c638daf44..7e5064f8ab8f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -853,8 +853,8 @@ static const struct file_operations kprobe_profile_ops = { /* Kprobe specific fetch functions */ /* Return the length of string -- including null terminal byte */ -static nokprobe_inline void -fetch_store_strlen(unsigned long addr, void *dest) +static nokprobe_inline int +fetch_store_strlen(unsigned long addr) { mm_segment_t old_fs; int ret, len = 0; @@ -872,47 +872,40 @@ fetch_store_strlen(unsigned long addr, void *dest) pagefault_enable(); set_fs(old_fs); - if (ret < 0) /* Failed to check the length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; + return (ret < 0) ? ret : len; } /* * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max * length and relative data location. */ -static nokprobe_inline void -fetch_store_string(unsigned long addr, void *dest) +static nokprobe_inline int +fetch_store_string(unsigned long addr, void *dest, void *base) { - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); + int maxlen = get_loc_len(*(u32 *)dest); + u8 *dst = get_loc_data(dest, base); long ret; - if (!maxlen) - return; - + if (unlikely(!maxlen)) + return -ENOMEM; /* * Try to get string again, since the string can be changed while * probing. */ ret = strncpy_from_unsafe(dst, (void *)addr, maxlen); - if (ret < 0) { /* Failed to fetch string */ - dst[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); - } else { - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); - } + if (ret >= 0) + *(u32 *)dest = make_data_loc(ret, (void *)dst - base); + return ret; } /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, - bool pre) + void *base) { unsigned long val; - int ret; + int ret = 0; /* 1st stage: get value from context */ switch (code->op) { @@ -949,6 +942,13 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, } /* 3rd stage: store value to buffer */ + if (unlikely(!dest)) { + if (code->op == FETCH_OP_ST_STRING) + return fetch_store_strlen(val + code->offset); + else + return -EILSEQ; + } + switch (code->op) { case FETCH_OP_ST_RAW: fetch_store_raw(val, code, dest); @@ -957,10 +957,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, probe_kernel_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: - if (pre) - fetch_store_strlen(val + code->offset, dest); - else - fetch_store_string(val + code->offset, dest); + ret = fetch_store_string(val + code->offset, dest, base); break; default: return -EILSEQ; @@ -973,7 +970,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, code++; } - return code->op == FETCH_OP_END ? 0 : -EILSEQ; + return code->op == FETCH_OP_END ? ret : -EILSEQ; } NOKPROBE_SYMBOL(process_fetch_insn) @@ -1008,7 +1005,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, entry = ring_buffer_event_data(event); entry->ip = (unsigned long)tk->rp.kp.addr; - store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); @@ -1057,7 +1054,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry = ring_buffer_event_data(event); entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); @@ -1203,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); - store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); return 0; @@ -1239,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5c262ed6347c..b6bdd82fa485 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -54,29 +54,15 @@ #define TP_FLAG_PROFILE 2 #define TP_FLAG_REGISTERED 4 +/* data_loc: data location, compatible with u32 */ +#define make_data_loc(len, offs) \ + (((u32)(len) << 16) | ((u32)(offs) & 0xffff)) +#define get_loc_len(dl) ((u32)(dl) >> 16) +#define get_loc_offs(dl) ((u32)(dl) & 0xffff) -/* data_rloc: data relative location, compatible with u32 */ -#define make_data_rloc(len, roffs) \ - (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) -#define get_rloc_len(dl) ((u32)(dl) >> 16) -#define get_rloc_offs(dl) ((u32)(dl) & 0xffff) - -/* - * Convert data_rloc to data_loc: - * data_rloc stores the offset from data_rloc itself, but data_loc - * stores the offset from event entry. - */ -#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) - -static nokprobe_inline void *get_rloc_data(u32 *dl) -{ - return (u8 *)dl + get_rloc_offs(*dl); -} - -/* For data_loc conversion */ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) { - return (u8 *)ent + get_rloc_offs(*dl); + return (u8 *)ent + get_loc_offs(*dl); } /* Printing function type */ diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index c8a5272abf01..3b4aba6f84cc 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -48,24 +48,28 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf) } } -/* Define this for each callsite */ +/* + * This must be defined for each callsite. + * Return consumed dynamic data size (>= 0), or error (< 0). + * If dest is NULL, don't store result and return required dynamic data size. + */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, - void *dest, bool pre); + void *dest, void *base); /* Sum up total data length for dynamic arraies (strings) */ static nokprobe_inline int __get_data_size(struct trace_probe *tp, struct pt_regs *regs) { struct probe_arg *arg; - int i, ret = 0; - u32 len; + int i, len, ret = 0; for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; if (unlikely(arg->dynamic)) { - process_fetch_insn(arg->code, regs, &len, true); - ret += len; + len = process_fetch_insn(arg->code, regs, NULL, NULL); + if (len > 0) + ret += len; } } @@ -74,34 +78,26 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) /* Store the value of each argument */ static nokprobe_inline void -store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, - u8 *data, int maxlen) +store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs, + int header_size, int maxlen) { struct probe_arg *arg; - u32 end = tp->size; - u32 *dl; /* Data (relative) location */ - int i; + void *base = data - header_size; + void *dyndata = data + tp->size; + u32 *dl; /* Data location */ + int ret, i; for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; - if (unlikely(arg->dynamic)) { - /* - * First, we set the relative location and - * maximum data length to *dl - */ - dl = (u32 *)(data + arg->offset); - *dl = make_data_rloc(maxlen, end - arg->offset); - /* Then try to fetch string or dynamic array data */ - process_fetch_insn(arg->code, regs, dl, false); - /* Reduce maximum length */ - end += get_rloc_len(*dl); - maxlen -= get_rloc_len(*dl); - /* Trick here, convert data_rloc to data_loc */ - *dl = convert_rloc_to_loc(*dl, ent_size + arg->offset); - } else - /* Just fetching data normally */ - process_fetch_insn(arg->code, regs, data + arg->offset, - false); + dl = data + arg->offset; + /* Point the dynamic data area if needed */ + if (unlikely(arg->dynamic)) + *dl = make_data_loc(maxlen, dyndata - base); + ret = process_fetch_insn(arg->code, regs, dl, base); + if (unlikely(ret < 0 && arg->dynamic)) + *dl = make_data_loc(0, dyndata - base); + else + dyndata += ret; } } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7772fec84c12..08ad51c8ebc0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -111,43 +111,38 @@ probe_user_read(void *dest, void *src, size_t size) * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. */ -static nokprobe_inline void -fetch_store_string(unsigned long addr, void *dest) +static nokprobe_inline int +fetch_store_string(unsigned long addr, void *dest, void *base) { long ret; - u32 rloc = *(u32 *)dest; - int maxlen = get_rloc_len(rloc); - u8 *dst = get_rloc_data(dest); + u32 loc = *(u32 *)dest; + int maxlen = get_loc_len(loc); + u8 *dst = get_loc_data(dest, base); void __user *src = (void __force __user *) addr; - if (!maxlen) - return; + if (unlikely(!maxlen)) + return -ENOMEM; ret = strncpy_from_user(dst, src, maxlen); - if (ret == maxlen) - dst[--ret] = '\0'; - - if (ret < 0) { /* Failed to fetch string */ - ((u8 *)get_rloc_data(dest))[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc)); - } else { - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc)); + if (ret >= 0) { + if (ret == maxlen) + dst[ret - 1] = '\0'; + *(u32 *)dest = make_data_loc(ret, (void *)dst - base); } + + return ret; } /* Return the length of string -- including null terminal byte */ -static nokprobe_inline void -fetch_store_strlen(unsigned long addr, void *dest) +static nokprobe_inline int +fetch_store_strlen(unsigned long addr) { int len; void __user *vaddr = (void __force __user *) addr; len = strnlen_user(vaddr, MAX_STRING_SIZE); - if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; + return (len > MAX_STRING_SIZE) ? 0 : len; } static unsigned long translate_user_vaddr(unsigned long file_offset) @@ -164,10 +159,10 @@ static unsigned long translate_user_vaddr(unsigned long file_offset) /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, - bool pre) + void *base) { unsigned long val; - int ret; + int ret = 0; /* 1st stage: get value from context */ switch (code->op) { @@ -204,18 +199,22 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, } /* 3rd stage: store value to buffer */ + if (unlikely(!dest)) { + if (code->op == FETCH_OP_ST_STRING) + return fetch_store_strlen(val + code->offset); + else + return -EILSEQ; + } + switch (code->op) { case FETCH_OP_ST_RAW: fetch_store_raw(val, code, dest); break; case FETCH_OP_ST_MEM: - probe_user_read(dest, (void *)val + code->offset, code->size); + probe_kernel_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: - if (pre) - fetch_store_strlen(val + code->offset, dest); - else - fetch_store_string(val + code->offset, dest); + ret = fetch_store_string(val + code->offset, dest, base); break; default: return -EILSEQ; @@ -228,7 +227,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, code++; } - return code->op == FETCH_OP_END ? 0 : -EILSEQ; + return code->op == FETCH_OP_END ? ret : -EILSEQ; } NOKPROBE_SYMBOL(process_fetch_insn) @@ -1300,7 +1299,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); if (tu->tp.flags & TP_FLAG_TRACE) ret |= uprobe_trace_func(tu, regs, ucb, dsize); @@ -1335,7 +1334,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); if (tu->tp.flags & TP_FLAG_TRACE) uretprobe_trace_func(tu, func, regs, ucb, dsize); -- cgit v1.2.3 From 0a46c8549f8c775ed6afac57a8b9fd7c4b4d156f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:19:30 +0900 Subject: tracing: probeevent: Append traceprobe_ for exported function Append traceprobe_ for exported function set_print_fmt() as same as other functions. Link: http://lkml.kernel.org/r/152465877071.26224.11143125027282999726.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_probe.c | 2 +- kernel/trace/trace_probe.h | 2 +- kernel/trace/trace_uprobe.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7e5064f8ab8f..4895ca85ec79 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1371,7 +1371,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) init_trace_event_call(tk, call); - if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) + if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) return -ENOMEM; ret = register_trace_event(&call->event); if (!ret) { @@ -1428,7 +1428,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, init_trace_event_call(tk, &tk->tp.call); - if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) { + if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) { ret = -ENOMEM; goto error; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d06e67cca3e1..d119bf8c3b4f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -490,7 +490,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, return pos; } -int set_print_fmt(struct trace_probe *tp, bool is_return) +int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) { int len; char *print_fmt; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index b6bdd82fa485..c4e9d3d3216d 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -254,7 +254,7 @@ extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -extern int set_print_fmt(struct trace_probe *tp, bool is_return); +extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return); #ifdef CONFIG_PERF_EVENTS extern struct trace_event_call * diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 08ad51c8ebc0..912cb2093944 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1370,7 +1370,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) init_trace_event_call(tu, call); - if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) + if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) return -ENOMEM; ret = register_trace_event(&call->event); @@ -1443,7 +1443,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, tu->filename = kstrdup(name, GFP_KERNEL); init_trace_event_call(tu, &tu->tp.call); - if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) { + if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) { ret = -ENOMEM; goto error; } -- cgit v1.2.3 From 9b960a38835fcaf977f20dcc34ce9e54ff9563bd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:19:59 +0900 Subject: tracing: probeevent: Unify fetch_insn processing common part Unify the fetch_insn bottom process (from stage 2: dereference indirect data) from kprobe and uprobe events, since those are mostly same. Link: http://lkml.kernel.org/r/152465879965.26224.8547240824606804815.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 47 ++++++----------------------------- kernel/trace/trace_probe_tmpl.h | 55 ++++++++++++++++++++++++++++++++++++++++- kernel/trace/trace_uprobe.c | 43 ++------------------------------ 3 files changed, 63 insertions(+), 82 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 4895ca85ec79..fdd43f2f1fd1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -899,13 +899,18 @@ fetch_store_string(unsigned long addr, void *dest, void *base) return ret; } +static nokprobe_inline int +probe_mem_read(void *dest, void *src, size_t size) +{ + return probe_kernel_read(dest, src, size); +} + /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, void *base) { unsigned long val; - int ret = 0; /* 1st stage: get value from context */ switch (code->op) { @@ -932,45 +937,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, } code++; - /* 2nd stage: dereference memory if needed */ - while (code->op == FETCH_OP_DEREF) { - ret = probe_kernel_read(&val, (void *)val + code->offset, - sizeof(val)); - if (ret) - return ret; - code++; - } - - /* 3rd stage: store value to buffer */ - if (unlikely(!dest)) { - if (code->op == FETCH_OP_ST_STRING) - return fetch_store_strlen(val + code->offset); - else - return -EILSEQ; - } - - switch (code->op) { - case FETCH_OP_ST_RAW: - fetch_store_raw(val, code, dest); - break; - case FETCH_OP_ST_MEM: - probe_kernel_read(dest, (void *)val + code->offset, code->size); - break; - case FETCH_OP_ST_STRING: - ret = fetch_store_string(val + code->offset, dest, base); - break; - default: - return -EILSEQ; - } - code++; - - /* 4th stage: modify stored value if needed */ - if (code->op == FETCH_OP_MOD_BF) { - fetch_apply_bitfield(code, dest); - code++; - } - - return code->op == FETCH_OP_END ? ret : -EILSEQ; + return process_fetch_insn_bottom(code, val, dest, base); } NOKPROBE_SYMBOL(process_fetch_insn) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 3b4aba6f84cc..b4075f3e3a29 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -49,13 +49,66 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf) } /* - * This must be defined for each callsite. + * These functions must be defined for each callsite. * Return consumed dynamic data size (>= 0), or error (< 0). * If dest is NULL, don't store result and return required dynamic data size. */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, void *base); +static nokprobe_inline int fetch_store_strlen(unsigned long addr); +static nokprobe_inline int +fetch_store_string(unsigned long addr, void *dest, void *base); +static nokprobe_inline int +probe_mem_read(void *dest, void *src, size_t size); + +/* From the 2nd stage, routine is same */ +static nokprobe_inline int +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, + void *dest, void *base) +{ + int ret = 0; + + /* 2nd stage: dereference memory if needed */ + while (code->op == FETCH_OP_DEREF) { + ret = probe_mem_read(&val, (void *)val + code->offset, + sizeof(val)); + if (ret) + return ret; + code++; + } + + /* 3rd stage: store value to buffer */ + if (unlikely(!dest)) { + if (code->op == FETCH_OP_ST_STRING) + return fetch_store_strlen(val + code->offset); + else + return -EILSEQ; + } + + switch (code->op) { + case FETCH_OP_ST_RAW: + fetch_store_raw(val, code, dest); + break; + case FETCH_OP_ST_MEM: + probe_mem_read(dest, (void *)val + code->offset, code->size); + break; + case FETCH_OP_ST_STRING: + ret = fetch_store_string(val + code->offset, dest, base); + break; + default: + return -EILSEQ; + } + code++; + + /* 4th stage: modify stored value if needed */ + if (code->op == FETCH_OP_MOD_BF) { + fetch_apply_bitfield(code, dest); + code++; + } + + return code->op == FETCH_OP_END ? ret : -EILSEQ; +} /* Sum up total data length for dynamic arraies (strings) */ static nokprobe_inline int diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 912cb2093944..7154473ffaa4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -101,7 +101,7 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) * Uprobes-specific fetch functions */ static nokprobe_inline int -probe_user_read(void *dest, void *src, size_t size) +probe_mem_read(void *dest, void *src, size_t size) { void __user *vaddr = (void __force __user *)src; @@ -162,7 +162,6 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, void *base) { unsigned long val; - int ret = 0; /* 1st stage: get value from context */ switch (code->op) { @@ -189,45 +188,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, } code++; - /* 2nd stage: dereference memory if needed */ - while (code->op == FETCH_OP_DEREF) { - ret = probe_user_read(&val, (void *)val + code->offset, - sizeof(val)); - if (ret) - return ret; - code++; - } - - /* 3rd stage: store value to buffer */ - if (unlikely(!dest)) { - if (code->op == FETCH_OP_ST_STRING) - return fetch_store_strlen(val + code->offset); - else - return -EILSEQ; - } - - switch (code->op) { - case FETCH_OP_ST_RAW: - fetch_store_raw(val, code, dest); - break; - case FETCH_OP_ST_MEM: - probe_kernel_read(dest, (void *)val + code->offset, code->size); - break; - case FETCH_OP_ST_STRING: - ret = fetch_store_string(val + code->offset, dest, base); - break; - default: - return -EILSEQ; - } - code++; - - /* 4th stage: modify stored value if needed */ - if (code->op == FETCH_OP_MOD_BF) { - fetch_apply_bitfield(code, dest); - code++; - } - - return code->op == FETCH_OP_END ? ret : -EILSEQ; + return process_fetch_insn_bottom(code, val, dest, base); } NOKPROBE_SYMBOL(process_fetch_insn) -- cgit v1.2.3 From 60c2e0cebfd01bd1bc5e8843f063264148d6b2bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:20:28 +0900 Subject: tracing: probeevent: Add symbol type Add "symbol" type to probeevent, which is an alias of u32 or u64 (depends on BITS_PER_LONG). This shows the result value in symbol+offset style. This type is only available with kprobe events. Link: http://lkml.kernel.org/r/152465882860.26224.14779072294412467338.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 2 ++ kernel/trace/trace.c | 2 +- kernel/trace/trace_probe.c | 8 ++++++++ kernel/trace/trace_probe.h | 12 +++++++++--- 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 8bfc75c90806..6224ddf34508 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -72,6 +72,8 @@ offset, and container-size (usually 32). The syntax is:: b@/ +Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG) +which shows given pointer in "symbol+offset" style. For $comm, the default type is "string"; any other type is invalid. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 147be8523560..1e3f28b1fa07 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4626,7 +4626,7 @@ static const char readme_msg[] = "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" "\t $stack, $stack, $retval, $comm\n" - "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n" + "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/\n" #endif " events/\t\t- Directory containing all trace event subsystems:\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d119bf8c3b4f..1e7e0618577d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -46,6 +46,13 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx") +int PRINT_TYPE_FUNC_NAME(symbol)(struct trace_seq *s, void *data, void *ent) +{ + trace_seq_printf(s, "%pS", (void *)*(unsigned long *)data); + return !trace_seq_has_overflowed(s); +} +const char PRINT_TYPE_FMT_NAME(symbol)[] = "%pS"; + /* Print type function for string type */ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) { @@ -79,6 +86,7 @@ static const struct fetch_type probe_fetch_types[] = { ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), + ASSIGN_FETCH_TYPE_ALIAS(symbol, ADDR_FETCH_TYPE, ADDR_FETCH_TYPE, 0), ASSIGN_FETCH_TYPE_END }; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index c4e9d3d3216d..469110e0790b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -145,6 +145,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(x32); DECLARE_BASIC_PRINT_TYPE_FUNC(x64); DECLARE_BASIC_PRINT_TYPE_FUNC(string); +DECLARE_BASIC_PRINT_TYPE_FUNC(symbol); /* Default (unsigned long) fetch type */ #define __DEFAULT_FETCH_TYPE(t) x##t @@ -152,6 +153,10 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(string); #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) +#define __ADDR_FETCH_TYPE(t) u##t +#define _ADDR_FETCH_TYPE(t) __ADDR_FETCH_TYPE(t) +#define ADDR_FETCH_TYPE _ADDR_FETCH_TYPE(BITS_PER_LONG) + #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ {.name = _name, \ .size = _size, \ @@ -160,13 +165,14 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(string); .fmt = PRINT_TYPE_FMT_NAME(ptype), \ .fmttype = _fmttype, \ } - +#define _ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ + __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, #_fmttype) #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ - __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, ptype) /* If ptype is an alias of atype, use this macro (show atype in format) */ #define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign) \ - __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype) + _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype) #define ASSIGN_FETCH_TYPE_END {} -- cgit v1.2.3 From 40b53b771806b1770837169cd32d1bf167fbccaf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:21:55 +0900 Subject: tracing: probeevent: Add array type support Add array type support for probe events. This allows user to get arraied types from memory address. The array type syntax is TYPE[N] Where TYPE is one of types (u8/16/32/64,s8/16/32/64, x8/16/32/64, symbol, string) and N is a fixed value less than 64. The string array type is a bit different from other types. For other base types, [1] is equal to (e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not equal to string. The string type itself represents "char array", but string array type represents "char * array". So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string. Link: http://lkml.kernel.org/r/152465891533.26224.6150658225601339931.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 11 +++ kernel/trace/trace.c | 3 +- kernel/trace/trace_probe.c | 130 +++++++++++++++++++++++++++--------- kernel/trace/trace_probe.h | 14 ++++ kernel/trace/trace_probe_tmpl.h | 63 ++++++++++++++--- 5 files changed, 181 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 6224ddf34508..2dfed7a1ea6f 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -64,9 +64,20 @@ respectively. 'x' prefix implies it is unsigned. Traced arguments are shown in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32' or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and x86-64 uses x64). +These value types can be an array. To record array data, you can add '[N]' +(where N is a fixed number, less than 64) to the base type. +E.g. 'x16[4]' means an array of x16 (2bytes hex) with 4 elements. +Note that the array can be applied to memory type fetchargs, you can not +apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is +wrong, but '+8($stack):x8[8]' is OK.) String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. +The string array type is a bit different from other types. For other base +types, [1] is equal to (e.g. +0(%di):x32[1] is same +as +0(%di):x32.) But string[1] is not equal to string. The string type itself +represents "char array", but string array type represents "char * array". +So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string. Bitfield is another special type, which takes 3 parameters, bit-width, bit- offset, and container-size (usually 32). The syntax is:: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1e3f28b1fa07..e7f99f513959 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4627,7 +4627,8 @@ static const char readme_msg[] = "\t fetcharg: %, @
, @[+|-],\n" "\t $stack, $stack, $retval, $comm\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" - "\t b@/\n" + "\t b@/,\n" + "\t \\[\\]\n" #endif " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 1e7e0618577d..dfd096031305 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -341,9 +341,9 @@ static int __parse_bitfield_probe_arg(const char *bf, int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, bool is_return, bool is_kprobe) { - struct fetch_insn *code, *tmp = NULL; - const char *t; - int ret; + struct fetch_insn *code, *scode, *tmp = NULL; + char *t, *t2; + int ret, len; if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); @@ -354,24 +354,42 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, pr_info("Failed to allocate memory for command '%s'.\n", arg); return -ENOMEM; } - t = strchr(parg->comm, ':'); + t = strchr(arg, ':'); if (t) { - arg[t - parg->comm] = '\0'; - t++; + *t = '\0'; + t2 = strchr(++t, '['); + if (t2) { + *t2 = '\0'; + parg->count = simple_strtoul(t2 + 1, &t2, 0); + if (strcmp(t2, "]") || parg->count == 0) + return -EINVAL; + if (parg->count > MAX_ARRAY_LEN) + return -E2BIG; + } } /* * The default type of $comm should be "string", and it can't be * dereferenced. */ if (!t && strcmp(arg, "$comm") == 0) - t = "string"; - parg->type = find_fetch_type(t); + parg->type = find_fetch_type("string"); + else + parg->type = find_fetch_type(t); if (!parg->type) { pr_info("Unsupported type: %s\n", t); return -EINVAL; } parg->offset = *size; - *size += parg->type->size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) + return -ENOMEM; + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, + parg->count); + } code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); if (!code) @@ -391,10 +409,20 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, ret = -EINVAL; goto fail; } - /* Since IMM or COMM must be the 1st insn, this is safe */ - if (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) + if (code->op != FETCH_OP_DEREF || parg->count) { + /* + * IMM and COMM is pointing actual address, those must + * be kept, and if parg->count != 0, this is an array + * of string pointers instead of string address itself. + */ code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + } code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */ + code->size = parg->type->size; parg->dynamic = true; } else if (code->op == FETCH_OP_DEREF) { code->op = FETCH_OP_ST_MEM; @@ -408,12 +436,29 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, code->op = FETCH_OP_ST_RAW; code->size = parg->type->size; } + scode = code; /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); if (ret) goto fail; } + /* Loop(Array) operation */ + if (parg->count) { + if (scode->op != FETCH_OP_ST_MEM && + scode->op != FETCH_OP_ST_STRING) { + pr_info("array only accepts memory or address\n"); + ret = -EINVAL; + goto fail; + } + code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + code->op = FETCH_OP_LP_ARRAY; + code->param = parg->count; + } code++; code->op = FETCH_OP_END; @@ -452,14 +497,17 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) kfree(arg->code); kfree(arg->name); kfree(arg->comm); + kfree(arg->fmt); } +/* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, bool is_return) { - int i; + struct probe_arg *parg; + int i, j; int pos = 0; - const char *fmt, *arg; if (!is_return) { @@ -470,33 +518,49 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; } - /* When len=0, we just calculate the needed length */ -#define LEN_OR_ZERO (len ? len - pos : 0) - pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", - tp->args[i].name, tp->args[i].type->fmt); + parg = tp->args + i; + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name); + if (parg->count) { + pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s", + parg->type->fmt); + for (j = 1; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s", + parg->type->fmt); + pos += snprintf(buf + pos, LEN_OR_ZERO, "}"); + } else + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", + parg->type->fmt); } pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); for (i = 0; i < tp->nr_args; i++) { - if (strcmp(tp->args[i].type->name, "string") == 0) + parg = tp->args + i; + if (parg->count) { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s[%d])"; + else + fmt = ", REC->%s[%d]"; + for (j = 0; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, + fmt, parg->name, j); + } else { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s)"; + else + fmt = ", REC->%s"; pos += snprintf(buf + pos, LEN_OR_ZERO, - ", __get_str(%s)", - tp->args[i].name); - else - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tp->args[i].name); + fmt, parg->name); + } } -#undef LEN_OR_ZERO - /* return the length of print_fmt */ return pos; } +#undef LEN_OR_ZERO int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) { @@ -524,11 +588,15 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, /* Set argument names as fields */ for (i = 0; i < tp->nr_args; i++) { struct probe_arg *parg = &tp->args[i]; - - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - offset + parg->offset, - parg->type->size, + const char *fmt = parg->type->fmttype; + int size = parg->type->size; + + if (parg->fmt) + fmt = parg->fmt; + if (parg->count) + size *= parg->count; + ret = trace_define_field(event_call, fmt, parg->name, + offset + parg->offset, size, parg->type->is_signed, FILTER_OTHER); if (ret) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 469110e0790b..1f456fd82483 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -30,6 +30,7 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 +#define MAX_ARRAY_LEN 64 #define MAX_STRING_SIZE PATH_MAX /* Reserved field names */ @@ -65,6 +66,14 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) return (u8 *)ent + get_loc_offs(*dl); } +static nokprobe_inline u32 update_data_loc(u32 loc, int consumed) +{ + u32 maxlen = get_loc_len(loc); + u32 offset = get_loc_offs(loc); + + return make_data_loc(maxlen - consumed, offset + consumed); +} + /* Printing function type */ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *); @@ -86,6 +95,8 @@ enum fetch_op { FETCH_OP_ST_STRING, /* String: .offset, .size */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ + // Stage 5 (loop) op + FETCH_OP_LP_ARRAY, /* Array: .param = loop count */ FETCH_OP_END, }; @@ -175,6 +186,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol); _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype) #define ASSIGN_FETCH_TYPE_END {} +#define MAX_ARRAY_LEN 64 #ifdef CONFIG_KPROBE_EVENTS bool trace_kprobe_on_func_entry(struct trace_event_call *call); @@ -195,8 +207,10 @@ struct probe_arg { struct fetch_insn *code; bool dynamic;/* Dynamic array (string) is used */ unsigned int offset; /* Offset from argument entry */ + unsigned int count; /* Array count */ const char *name; /* Name of this argument */ const char *comm; /* Command of this argument */ + char *fmt; /* Format string if needed */ const struct fetch_type *type; /* Type of this argument */ }; diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index b4075f3e3a29..5c56afc17cf8 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -67,10 +67,15 @@ static nokprobe_inline int process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, void *dest, void *base) { - int ret = 0; + struct fetch_insn *s3 = NULL; + int total = 0, ret = 0, i = 0; + u32 loc = 0; + unsigned long lval = val; +stage2: /* 2nd stage: dereference memory if needed */ while (code->op == FETCH_OP_DEREF) { + lval = val; ret = probe_mem_read(&val, (void *)val + code->offset, sizeof(val)); if (ret) @@ -78,11 +83,15 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; } + s3 = code; +stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { - if (code->op == FETCH_OP_ST_STRING) - return fetch_store_strlen(val + code->offset); - else + if (code->op == FETCH_OP_ST_STRING) { + ret += fetch_store_strlen(val + code->offset); + code++; + goto array; + } else return -EILSEQ; } @@ -94,6 +103,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, probe_mem_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: + loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); break; default: @@ -107,6 +117,29 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; } +array: + /* the last stage: Loop on array */ + if (code->op == FETCH_OP_LP_ARRAY) { + total += ret; + if (++i < code->param) { + code = s3; + if (s3->op != FETCH_OP_ST_STRING) { + dest += s3->size; + val += s3->size; + goto stage3; + } + code--; + val = lval + sizeof(char *); + if (dest) { + dest += sizeof(u32); + *(u32 *)dest = update_data_loc(loc, ret); + } + goto stage2; + } + code++; + ret = total; + } + return code->op == FETCH_OP_END ? ret : -EILSEQ; } @@ -158,12 +191,26 @@ static inline int print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field) { - int i; + void *p; + int i, j; for (i = 0; i < nr_args; i++) { - trace_seq_printf(s, " %s=", args[i].name); - if (!args[i].type->print(s, data + args[i].offset, field)) - return -ENOMEM; + struct probe_arg *a = args + i; + + trace_seq_printf(s, " %s=", a->name); + if (likely(!a->count)) { + if (!a->type->print(s, data + a->offset, field)) + return -ENOMEM; + continue; + } + trace_seq_putc(s, '{'); + p = data + a->offset; + for (j = 0; j < a->count; j++) { + if (!a->type->print(s, p, field)) + return -ENOMEM; + trace_seq_putc(s, j == a->count - 1 ? '}' : ','); + p += a->type->size; + } } return 0; } -- cgit v1.2.3 From a1303af5d79eb13a658633a9fb0ce3aed0f7decf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:21:26 +0900 Subject: tracing: probeevent: Add $argN for accessing function args Add $argN special fetch variable for accessing function arguments. This allows user to trace the Nth argument easily at the function entry. Note that this returns most probably assignment of registers and stacks. In some case, it may not work well. If you need to access correct registers or stacks you should use perf-probe. Link: http://lkml.kernel.org/r/152465888632.26224.3412465701570253696.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 10 ++++++---- kernel/trace/trace.c | 4 ++++ kernel/trace/trace_kprobe.c | 18 +++++++++++++----- kernel/trace/trace_probe.c | 36 +++++++++++++++++++++++------------- kernel/trace/trace_probe.h | 9 ++++++++- kernel/trace/trace_uprobe.c | 2 +- 6 files changed, 55 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 2dfed7a1ea6f..47e765c2f2c3 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -45,16 +45,18 @@ Synopsis of kprobe_events @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $retval : Fetch return value.(*) + $argN : Fetch the Nth function argument. (N >= 1) (\*1) + $retval : Fetch return value.(\*2) $comm : Fetch current task comm. - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(\*3) NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types (x8/x16/x32/x64), "string" and bitfield are supported. - (*) only for return probe. - (**) this is useful for fetching a field of data structures. + (\*1) only for the probe on function entry (offs == 0). + (\*2) only for return probe. + (\*3) this is useful for fetching a field of data structures. Types ----- diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e7f99f513959..ec5b21778806 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4625,7 +4625,11 @@ static const char readme_msg[] = #endif "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + "\t $stack, $stack, $retval, $comm, $arg\n" +#else "\t $stack, $stack, $retval, $comm\n" +#endif "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fdd43f2f1fd1..3faaadbddf54 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -533,13 +533,15 @@ static int create_trace_kprobe(int argc, char **argv) long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; + unsigned int flags = TPARG_FL_KERNEL; /* argc must be >= 1 */ if (argv[0][0] == 'p') is_return = false; - else if (argv[0][0] == 'r') + else if (argv[0][0] == 'r') { is_return = true; - else if (argv[0][0] == '-') + flags |= TPARG_FL_RETURN; + } else if (argv[0][0] == '-') is_delete = true; else { pr_info("Probe definition must be started with 'p', 'r' or" @@ -625,8 +627,9 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Failed to parse either an address or a symbol.\n"); return ret; } - if (offset && is_return && - !kprobe_on_func_entry(NULL, symbol, offset)) { + if (kprobe_on_func_entry(NULL, symbol, offset)) + flags |= TPARG_FL_FENTRY; + if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } @@ -696,7 +699,7 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true); + flags); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -932,6 +935,11 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_COMM: val = (unsigned long)current->comm; break; +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + case FETCH_OP_ARG: + val = regs_get_kernel_argument(regs, code->param); + break; +#endif default: return -EILSEQ; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dfd096031305..333cda6d2633 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -157,14 +157,13 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_insn *code, bool is_return, - bool is_kprobe) + struct fetch_insn *code, unsigned int flags) { int ret = 0; unsigned long param; if (strcmp(arg, "retval") == 0) { - if (is_return) + if (flags & TPARG_FL_RETURN) code->op = FETCH_OP_RETVAL; else ret = -EINVAL; @@ -173,7 +172,8 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, code->op = FETCH_OP_STACKP; } else if (isdigit(arg[5])) { ret = kstrtoul(arg + 5, 10, ¶m); - if (ret || (is_kprobe && param > PARAM_MAX_STACK)) + if (ret || ((flags & TPARG_FL_KERNEL) && + param > PARAM_MAX_STACK)) ret = -EINVAL; else { code->op = FETCH_OP_STACK; @@ -183,6 +183,18 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, ret = -EINVAL; } else if (strcmp(arg, "comm") == 0) { code->op = FETCH_OP_COMM; +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + } else if (((flags & TPARG_FL_MASK) == + (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) && + strncmp(arg, "arg", 3) == 0) { + if (!isdigit(arg[3])) + return -EINVAL; + ret = kstrtoul(arg + 3, 10, ¶m); + if (ret || !param || param > PARAM_MAX_STACK) + return -EINVAL; + code->op = FETCH_OP_ARG; + code->param = (unsigned int)param - 1; +#endif } else ret = -EINVAL; @@ -193,7 +205,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, static int parse_probe_arg(char *arg, const struct fetch_type *type, struct fetch_insn **pcode, struct fetch_insn *end, - bool is_return, bool is_kprobe) + unsigned int flags) { struct fetch_insn *code = *pcode; unsigned long param; @@ -203,8 +215,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, type, code, - is_return, is_kprobe); + ret = parse_probe_vars(arg + 1, type, code, flags); break; case '%': /* named register */ @@ -226,7 +237,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ - if (is_kprobe) + if (flags & TPARG_FL_KERNEL) return -EINVAL; ret = kstrtol(arg + 2, 0, &offset); @@ -237,7 +248,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ - if (!is_kprobe) + if (!(flags & TPARG_FL_KERNEL)) return -EINVAL; ret = traceprobe_split_symbol_offset(arg + 1, &offset); @@ -278,8 +289,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, const struct fetch_type *t2 = find_fetch_type(NULL); *tmp = '\0'; - ret = parse_probe_arg(arg, t2, &code, end, is_return, - is_kprobe); + ret = parse_probe_arg(arg, t2, &code, end, flags); if (ret) break; if (code->op == FETCH_OP_COMM) @@ -339,7 +349,7 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe) + struct probe_arg *parg, unsigned int flags) { struct fetch_insn *code, *scode, *tmp = NULL; char *t, *t2; @@ -397,7 +407,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - is_return, is_kprobe); + flags); if (ret) goto fail; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 1f456fd82483..09f62171cc23 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "trace.h" @@ -86,6 +87,7 @@ enum fetch_op { FETCH_OP_RETVAL, /* Return value */ FETCH_OP_IMM, /* Immediate : .immediate */ FETCH_OP_COMM, /* Current comm */ + FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ @@ -263,8 +265,13 @@ find_event_file_link(struct trace_probe *tp, struct trace_event_file *file) return NULL; } +#define TPARG_FL_RETURN BIT(0) +#define TPARG_FL_KERNEL BIT(1) +#define TPARG_FL_FENTRY BIT(2) +#define TPARG_FL_MASK GENMASK(2, 0) + extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe); + struct probe_arg *parg, unsigned int flags); extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7154473ffaa4..394b93572506 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -557,7 +557,7 @@ static int create_trace_uprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, - is_return, false); + is_return ? TPARG_FL_RETURN : 0); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; -- cgit v1.2.3 From f3f58935edbcb33fd529fc46d554162a0660fd2d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Aug 2018 01:17:47 +0900 Subject: tracing/uprobes: Fix to return -EFAULT if copy_from_user failed Fix probe_mem_read() to return -EFAULT if copy_from_user() failed. The copy_from_user() returns remaining bytes when it failed, but probe_mem_read() caller expects it returns error code like as probe_kernel_read(). Link: http://lkml.kernel.org/r/153547306719.26502.8353484532699160223.stgit@devbox Reported-by: Dan Carpenter Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 394b93572506..31ea48eceda1 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -105,7 +105,7 @@ probe_mem_read(void *dest, void *src, size_t size) { void __user *vaddr = (void __force __user *)src; - return copy_from_user(dest, vaddr, size); + return copy_from_user(dest, vaddr, size) ? -EFAULT : 0; } /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max -- cgit v1.2.3 From 59158ec4aef7d44be51a6f3e7e17fc64c32604eb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Aug 2018 01:18:15 +0900 Subject: tracing/kprobes: Check the probe on unloaded module correctly Current kprobe event doesn't checks correctly whether the given event is on unloaded module or not. It just checks the event has ":" in the name. That is not enough because if we define a probe on non-exist symbol on loaded module, it allows to define that (with warning message) To ensure it correctly, this searches the module name on loaded module list and only if there is not, it allows to define it. (this event will be available when the target module is loaded) Link: http://lkml.kernel.org/r/153547309528.26502.8300278470528281328.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3faaadbddf54..4727a13824f0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -62,9 +62,23 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, return strncmp(mod->name, name, len) == 0 && name[len] == ':'; } -static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) { - return !!strchr(trace_kprobe_symbol(tk), ':'); + char *p; + bool ret; + + if (!tk->symbol) + return false; + p = strchr(tk->symbol, ':'); + if (!p) + return true; + *p = '\0'; + mutex_lock(&module_mutex); + ret = !!find_module(tk->symbol); + mutex_unlock(&module_mutex); + *p = ':'; + + return ret; } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) @@ -374,19 +388,13 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(&tk->rp.kp); - if (ret == 0) + if (ret == 0) { tk->tp.flags |= TP_FLAG_REGISTERED; - else { - if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { - pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); - ret = 0; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } + } else if (ret == -EILSEQ) { + pr_warn("Probing address(0x%p) is not an instruction boundary.\n", + tk->rp.kp.addr); + ret = -EINVAL; } - return ret; } @@ -449,6 +457,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk) /* Register k*probe */ ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + if (ret < 0) unregister_kprobe_event(tk); else -- cgit v1.2.3 From a6682814f37124ec1e708cca8f44968445fa9dd7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Aug 2018 01:18:43 +0900 Subject: tracing/kprobes: Allow kprobe-events to record module symbol Allow kprobe-events to record module symbols. Since data symbols in a non-loaded module doesn't exist, it fails to define such symbol as an argument of kprobe-event. But if the kprobe event is defined on that module, we can defer to resolve the symbol address. Note that if given symbol is not found, the event is kept unavailable. User can enable it but the event is not recorded. Link: http://lkml.kernel.org/r/153547312336.26502.11432902826345374463.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 12 ++++++++- kernel/trace/trace_probe.c | 62 +++++++++++++++++++++++++++++++++++++++------ kernel/trace/trace_probe.h | 4 ++- 3 files changed, 68 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 4727a13824f0..fec67188c4d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -366,7 +366,7 @@ static bool within_notrace_func(struct trace_kprobe *tk) /* Internal register function - just handle k*probes and flags */ static int __register_trace_kprobe(struct trace_kprobe *tk) { - int ret; + int i, ret; if (trace_probe_is_registered(&tk->tp)) return -EINVAL; @@ -377,6 +377,12 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; } + for (i = 0; i < tk->tp.nr_args; i++) { + ret = traceprobe_update_arg(&tk->tp.args[i]); + if (ret) + return ret; + } + /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(&tk->tp)) tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; @@ -928,6 +934,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, { unsigned long val; +retry: /* 1st stage: get value from context */ switch (code->op) { case FETCH_OP_REG: @@ -953,6 +960,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, val = regs_get_kernel_argument(regs, code->param); break; #endif + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ + code++; + goto retry; default: return -EILSEQ; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 333cda6d2633..5b3d573b3dcf 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -251,16 +251,16 @@ parse_probe_arg(char *arg, const struct fetch_type *type, if (!(flags & TPARG_FL_KERNEL)) return -EINVAL; - ret = traceprobe_split_symbol_offset(arg + 1, &offset); - if (ret) - break; + /* Preserve symbol for updating */ + code->op = FETCH_NOP_SYMBOL; + code->data = kstrdup(arg + 1, GFP_KERNEL); + if (!code->data) + return -ENOMEM; + if (++code == end) + return -E2BIG; code->op = FETCH_OP_IMM; - code->immediate = - (unsigned long)kallsyms_lookup_name(arg + 1); - if (!code->immediate) - return -ENOENT; - code->immediate += offset; + code->immediate = 0; } /* These are fetching from memory */ if (++code == end) @@ -480,6 +480,11 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1)); fail: + if (ret) { + for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + } kfree(tmp); return ret; @@ -504,12 +509,53 @@ int traceprobe_conflict_field_name(const char *name, void traceprobe_free_probe_arg(struct probe_arg *arg) { + struct fetch_insn *code = arg->code; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + code++; + } kfree(arg->code); kfree(arg->name); kfree(arg->comm); kfree(arg->fmt); } +int traceprobe_update_arg(struct probe_arg *arg) +{ + struct fetch_insn *code = arg->code; + long offset; + char *tmp; + char c; + int ret = 0; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) { + if (code[1].op != FETCH_OP_IMM) + return -EINVAL; + + tmp = strpbrk("+-", code->data); + if (tmp) + c = *tmp; + ret = traceprobe_split_symbol_offset(code->data, + &offset); + if (ret) + return ret; + + code[1].immediate = + (unsigned long)kallsyms_lookup_name(code->data); + if (tmp) + *tmp = c; + if (!code[1].immediate) + return -ENOENT; + code[1].immediate += offset; + } + code++; + } + return 0; +} + /* When len=0, we just calculate the needed length */ #define LEN_OR_ZERO (len ? len - pos : 0) static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 09f62171cc23..974afc1a3e73 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -100,6 +100,7 @@ enum fetch_op { // Stage 5 (loop) op FETCH_OP_LP_ARRAY, /* Array: .param = loop count */ FETCH_OP_END, + FETCH_NOP_SYMBOL, /* Unresolved Symbol holder */ }; struct fetch_insn { @@ -116,6 +117,7 @@ struct fetch_insn { unsigned char rshift; }; unsigned long immediate; + void *data; }; }; @@ -276,7 +278,7 @@ extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); -extern void traceprobe_update_arg(struct probe_arg *arg); +extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -- cgit v1.2.3 From bf173ca92da97863e1579a982d500da98f2e7a3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 12 Oct 2018 12:50:22 -0400 Subject: tracing: probeevent: Fix uninitialized used of offset in parse args Dan's smatch utility found an uninitialized use of offset in a path in parse_probe_args(). Unless an offset is specifically specified for commands that allow them, it should default to zero. Link: http://lkml.kernel.org/r/20181012134246.5doqaobxunlqqs53@mwanda Fixes: 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") Reported-by: Dan Carpenter (smatch) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5b3d573b3dcf..3ef15a6683c0 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -209,7 +209,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, { struct fetch_insn *code = *pcode; unsigned long param; - long offset; + long offset = 0; char *tmp; int ret = 0; -- cgit v1.2.3 From bcfa4b72111c9a4d483024cb1f877803b354aa11 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 15 Aug 2018 14:22:16 -0400 Subject: memremap: Convert to XArray Use the new xa_store_range function instead of the radix tree. Signed-off-by: Matthew Wilcox --- kernel/memremap.c | 76 ++++++++++++------------------------------------------- 1 file changed, 16 insertions(+), 60 deletions(-) (limited to 'kernel') diff --git a/kernel/memremap.c b/kernel/memremap.c index 5b8600d39931..e842fab9f184 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -1,47 +1,21 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright(c) 2015 Intel Corporation. All rights reserved. */ -#include #include -#include -#include #include #include -#include #include +#include +#include #include #include +#include #include +#include -static DEFINE_MUTEX(pgmap_lock); -static RADIX_TREE(pgmap_radix, GFP_KERNEL); +static DEFINE_XARRAY(pgmap_array); #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) -static unsigned long order_at(struct resource *res, unsigned long pgoff) -{ - unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; - unsigned long nr_pages, mask; - - nr_pages = PHYS_PFN(resource_size(res)); - if (nr_pages == pgoff) - return ULONG_MAX; - - /* - * What is the largest aligned power-of-2 range available from - * this resource pgoff to the end of the resource range, - * considering the alignment of the current pgoff? - */ - mask = phys_pgoff | rounddown_pow_of_two(nr_pages - pgoff); - if (!mask) - return ULONG_MAX; - - return find_first_bit(&mask, BITS_PER_LONG); -} - -#define foreach_order_pgoff(res, order, pgoff) \ - for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \ - pgoff += 1UL << order, order = order_at((res), pgoff)) - #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, @@ -70,18 +44,10 @@ vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, EXPORT_SYMBOL(device_private_entry_fault); #endif /* CONFIG_DEVICE_PRIVATE */ -static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff) +static void pgmap_array_delete(struct resource *res) { - unsigned long pgoff, order; - - mutex_lock(&pgmap_lock); - foreach_order_pgoff(res, order, pgoff) { - if (pgoff >= end_pgoff) - break; - radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); - } - mutex_unlock(&pgmap_lock); - + xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end), + NULL, GFP_KERNEL); synchronize_rcu(); } @@ -142,7 +108,7 @@ static void devm_memremap_pages_release(void *data) mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); - pgmap_radix_release(res, -1); + pgmap_array_delete(res); dev_WARN_ONCE(dev, pgmap->altmap.alloc, "%s: failed to free all reserved pages\n", __func__); } @@ -175,7 +141,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) struct vmem_altmap *altmap = pgmap->altmap_valid ? &pgmap->altmap : NULL; struct resource *res = &pgmap->res; - unsigned long pfn, pgoff, order; + unsigned long pfn; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; struct dev_pagemap *conflict_pgmap; @@ -216,20 +182,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) pgmap->dev = dev; - mutex_lock(&pgmap_lock); - error = 0; - - foreach_order_pgoff(res, order, pgoff) { - error = __radix_tree_insert(&pgmap_radix, - PHYS_PFN(res->start) + pgoff, order, pgmap); - if (error) { - dev_err(dev, "%s: failed: %d\n", __func__, error); - break; - } - } - mutex_unlock(&pgmap_lock); + error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start), + PHYS_PFN(res->end), pgmap, GFP_KERNEL)); if (error) - goto err_radix; + goto err_array; nid = dev_to_node(dev); if (nid < 0) @@ -279,8 +235,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_kasan: untrack_pfn(NULL, PHYS_PFN(align_start), align_size); err_pfn_remap: - err_radix: - pgmap_radix_release(res, pgoff); + pgmap_array_delete(res); + err_array: return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); @@ -320,7 +276,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, /* fall back to slow path lookup */ rcu_read_lock(); - pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); + pgmap = xa_load(&pgmap_array, PHYS_PFN(phys)); if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) pgmap = NULL; rcu_read_unlock(); -- cgit v1.2.3 From 145d952a29320dea883246bcb24ba1da7ac4bb7f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 4 Oct 2018 14:04:02 +0200 Subject: sched: Factor out nr_iowait and nr_iowait_cpu The function nr_iowait_cpu() can be used directly by nr_iowait() instead of duplicating code. Call nr_iowait_cpu() from nr_iowait() Signed-off-by: Daniel Lezcano Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- kernel/sched/core.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe0223121883..9245c56b8f5f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2875,6 +2875,25 @@ unsigned long long nr_context_switches(void) return sum; } +/* + * Consumers of these two interfaces, like for example the cpuidle menu + * governor, are using nonsensical data. Preferring shallow idle state selection + * for a CPU that has IO-wait which might not even end up running the task when + * it does become runnable. + */ + +unsigned long nr_iowait_cpu(int cpu) +{ + return atomic_read(&cpu_rq(cpu)->nr_iowait); +} + +void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) +{ + struct rq *rq = this_rq(); + *nr_waiters = atomic_read(&rq->nr_iowait); + *load = rq->load.weight; +} + /* * IO-wait accounting, and how its mostly bollocks (on SMP). * @@ -2910,31 +2929,11 @@ unsigned long nr_iowait(void) unsigned long i, sum = 0; for_each_possible_cpu(i) - sum += atomic_read(&cpu_rq(i)->nr_iowait); + sum += nr_iowait_cpu(i); return sum; } -/* - * Consumers of these two interfaces, like for example the cpuidle menu - * governor, are using nonsensical data. Preferring shallow idle state selection - * for a CPU that has IO-wait which might not even end up running the task when - * it does become runnable. - */ - -unsigned long nr_iowait_cpu(int cpu) -{ - struct rq *this = cpu_rq(cpu); - return atomic_read(&this->nr_iowait); -} - -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) -{ - struct rq *rq = this_rq(); - *nr_waiters = atomic_read(&rq->nr_iowait); - *load = rq->load.weight; -} - #ifdef CONFIG_SMP /* -- cgit v1.2.3 From a7fe5190c03f8137ef08db84a58dd4daf2c4785d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 4 Oct 2018 14:04:03 +0200 Subject: cpuidle: menu: Remove get_loadavg() from the performance multiplier The function get_loadavg() returns almost always zero. To be more precise, statistically speaking for a total of 1023379 times passing in the function, the load is equal to zero 1020728 times, greater than 100, 610 times, the remaining is between 0 and 5. In 2011, the get_loadavg() was removed from the Android tree because of the above [1]. At this time, the load was: unsigned long this_cpu_load(void) { struct rq *this = this_rq(); return this->cpu_load[0]; } In 2014, the code was changed by commit 372ba8cb46b2 (cpuidle: menu: Lookup CPU runqueues less) and the load is: void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) { struct rq *rq = this_rq(); *nr_waiters = atomic_read(&rq->nr_iowait); *load = rq->load.weight; } with the same result. Both measurements show using the load in this code path does no matter anymore. Removing it. [1] https://android.googlesource.com/kernel/common/+/4dedd9f124703207895777ac6e91dacde0f7cc17 Signed-off-by: Daniel Lezcano Acked-by: Mel Gorman Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 25 ++++++------------------- include/linux/sched/stat.h | 1 - kernel/sched/core.c | 7 ------- 3 files changed, 6 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 575a68f31761..76df4f947f07 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -134,11 +134,6 @@ struct menu_device { #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) -static inline int get_loadavg(unsigned long load) -{ - return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10; -} - static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) { int bucket = 0; @@ -172,18 +167,10 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters * to be, the higher this multiplier, and thus the higher * the barrier to go to an expensive C state. */ -static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) +static inline int performance_multiplier(unsigned long nr_iowaiters) { - int mult = 1; - - /* for higher loadavg, we are more reluctant */ - - mult += 2 * get_loadavg(load); - - /* for IO wait tasks (per cpu!) we add 5x each */ - mult += 10 * nr_iowaiters; - - return mult; + /* for IO wait tasks (per cpu!) we add 10x each */ + return 1 + 10 * nr_iowaiters; } static DEFINE_PER_CPU(struct menu_device, menu_devices); @@ -301,7 +288,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int idx; unsigned int interactivity_req; unsigned int predicted_us; - unsigned long nr_iowaiters, cpu_load; + unsigned long nr_iowaiters; ktime_t delta_next; if (data->needs_update) { @@ -312,7 +299,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); - get_iowait_load(&nr_iowaiters, &cpu_load); + nr_iowaiters = nr_iowait_cpu(dev->cpu); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); if (unlikely(drv->state_count <= 1 || latency_req == 0) || @@ -356,7 +343,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * Use the performance multiplier and the user-configurable * latency_req to determine the maximum exit latency. */ - interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); + interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); if (latency_req > interactivity_req) latency_req = interactivity_req; } diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index 04f1321d14c4..f30954cc059d 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -20,7 +20,6 @@ extern unsigned long nr_running(void); extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); static inline int sched_info_on(void) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9245c56b8f5f..c8d5c279be14 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2887,13 +2887,6 @@ unsigned long nr_iowait_cpu(int cpu) return atomic_read(&cpu_rq(cpu)->nr_iowait); } -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) -{ - struct rq *rq = this_rq(); - *nr_waiters = atomic_read(&rq->nr_iowait); - *load = rq->load.weight; -} - /* * IO-wait accounting, and how its mostly bollocks (on SMP). * -- cgit v1.2.3 From a1c6ca3c6de763459a6e93b644ec6518c890ba1c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 17 Oct 2018 13:23:55 +0200 Subject: kernel: hung_task.c: disable on suspend It is possible to observe hung_task complaints when system goes to suspend-to-idle state: # echo freeze > /sys/power/state PM: Syncing filesystems ... done. Freezing user space processes ... (elapsed 0.001 seconds) done. OOM killer disabled. Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done. sd 0:0:0:0: [sda] Synchronizing SCSI cache INFO: task bash:1569 blocked for more than 120 seconds. Not tainted 4.19.0-rc3_+ #687 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0 1569 604 0x00000000 Call Trace: ? __schedule+0x1fe/0x7e0 schedule+0x28/0x80 suspend_devices_and_enter+0x4ac/0x750 pm_suspend+0x2c0/0x310 Register a PM notifier to disable the detector on suspend and re-enable back on wakeup. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Rafael J. Wysocki --- kernel/hung_task.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index b9132d1269ef..cb8e3e8ac7b9 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +243,28 @@ void reset_hung_task_detector(void) } EXPORT_SYMBOL_GPL(reset_hung_task_detector); +static bool hung_detector_suspended; + +static int hungtask_pm_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + hung_detector_suspended = true; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + hung_detector_suspended = false; + break; + default: + break; + } + return NOTIFY_OK; +} + /* * kthread which checks for tasks stuck in D state */ @@ -261,7 +284,8 @@ static int watchdog(void *dummy) interval = min_t(unsigned long, interval, timeout); t = hung_timeout_jiffies(hung_last_checked, interval); if (t <= 0) { - if (!atomic_xchg(&reset_hung_task, 0)) + if (!atomic_xchg(&reset_hung_task, 0) && + !hung_detector_suspended) check_hung_uninterruptible_tasks(timeout); hung_last_checked = jiffies; continue; @@ -275,6 +299,10 @@ static int watchdog(void *dummy) static int __init hung_task_init(void) { atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + + /* Disable hung task detector on suspend */ + pm_notifier(hungtask_pm_notify, 0); + watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); return 0; -- cgit v1.2.3 From f592f804831f1cf9d1f9966f58c80f150e6829b5 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 24 Oct 2018 20:15:17 +0900 Subject: bpf: devmap: fix wrong interface selection in notifier_call The dev_map_notification() removes interface in devmap if unregistering interface's ifindex is same. But only checking ifindex is not enough because other netns can have same ifindex. so that wrong interface selection could occurred. Hence netdev pointer comparison code is added. v2: compare netdev pointer instead of using net_eq() (Daniel Borkmann) v1: Initial patch Fixes: 2ddf71e23cc2 ("net: add notifier hooks for devmap bpf map") Signed-off-by: Taehee Yoo Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 141710b82a6c..191b79948424 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -512,8 +512,7 @@ static int dev_map_notification(struct notifier_block *notifier, struct bpf_dtab_netdev *dev, *odev; dev = READ_ONCE(dtab->netdev_map[i]); - if (!dev || - dev->dev->ifindex != netdev->ifindex) + if (!dev || netdev != dev->dev) continue; odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); if (dev == odev) -- cgit v1.2.3 From 4a6998aff82a20a1aece86a186d8e5263f8b2315 Mon Sep 17 00:00:00 2001 From: Martin Lau Date: Wed, 24 Oct 2018 20:42:25 +0000 Subject: bpf, btf: fix a missing check bug in btf_parse Wenwen Wang reported: In btf_parse(), the header of the user-space btf data 'btf_data' is firstly parsed and verified through btf_parse_hdr(). In btf_parse_hdr(), the header is copied from user-space 'btf_data' to kernel-space 'btf->hdr' and then verified. If no error happens during the verification process, the whole data of 'btf_data', including the header, is then copied to 'data' in btf_parse(). It is obvious that the header is copied twice here. More importantly, no check is enforced after the second copy to make sure the headers obtained in these two copies are same. Given that 'btf_data' resides in the user space, a malicious user can race to modify the header between these two copies. By doing so, the user can inject inconsistent data, which can cause undefined behavior of the kernel and introduce potential security risk. This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf: btf: Fix a missing check bug"). To fix it, this patch copies the user 'btf_data' *before* parsing / verifying the BTF header. Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Signed-off-by: Martin KaFai Lau Co-developed-by: Wenwen Wang Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/btf.c | 58 ++++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 378cef70341c..ee4c82667d65 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2067,56 +2067,47 @@ static int btf_check_sec_info(struct btf_verifier_env *env, return 0; } -static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data, - u32 btf_data_size) +static int btf_parse_hdr(struct btf_verifier_env *env) { + u32 hdr_len, hdr_copy, btf_data_size; const struct btf_header *hdr; - u32 hdr_len, hdr_copy; - /* - * Minimal part of the "struct btf_header" that - * contains the hdr_len. - */ - struct btf_min_header { - u16 magic; - u8 version; - u8 flags; - u32 hdr_len; - } __user *min_hdr; struct btf *btf; int err; btf = env->btf; - min_hdr = btf_data; + btf_data_size = btf->data_size; - if (btf_data_size < sizeof(*min_hdr)) { + if (btf_data_size < + offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) { btf_verifier_log(env, "hdr_len not found"); return -EINVAL; } - if (get_user(hdr_len, &min_hdr->hdr_len)) - return -EFAULT; - + hdr = btf->data; + hdr_len = hdr->hdr_len; if (btf_data_size < hdr_len) { btf_verifier_log(env, "btf_header not found"); return -EINVAL; } - err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len); - if (err) { - if (err == -E2BIG) - btf_verifier_log(env, "Unsupported btf_header"); - return err; + /* Ensure the unsupported header fields are zero */ + if (hdr_len > sizeof(btf->hdr)) { + u8 *expected_zero = btf->data + sizeof(btf->hdr); + u8 *end = btf->data + hdr_len; + + for (; expected_zero < end; expected_zero++) { + if (*expected_zero) { + btf_verifier_log(env, "Unsupported btf_header"); + return -E2BIG; + } + } } hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr)); - if (copy_from_user(&btf->hdr, btf_data, hdr_copy)) - return -EFAULT; + memcpy(&btf->hdr, btf->data, hdr_copy); hdr = &btf->hdr; - if (hdr->hdr_len != hdr_len) - return -EINVAL; - btf_verifier_log_hdr(env, btf_data_size); if (hdr->magic != BTF_MAGIC) { @@ -2186,10 +2177,6 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, } env->btf = btf; - err = btf_parse_hdr(env, btf_data, btf_data_size); - if (err) - goto errout; - data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); if (!data) { err = -ENOMEM; @@ -2198,13 +2185,18 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, btf->data = data; btf->data_size = btf_data_size; - btf->nohdr_data = btf->data + btf->hdr.hdr_len; if (copy_from_user(data, btf_data, btf_data_size)) { err = -EFAULT; goto errout; } + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + err = btf_parse_str_sec(env); if (err) goto errout; -- cgit v1.2.3 From 5d66fa7d9e9e9399ddfdc530f352dd6f7c724485 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:45 +0200 Subject: bpf: fix direct packet access for flow dissector progs Commit d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") added direct packet access for skbs in may_access_direct_pkt_data() function where this enables read and write access to the skb->data. This is buggy because without a prologue generator such as bpf_unclone_prologue() we would allow for writing into cloned skbs. Original intention might have been to only allow read access where this is not needed (similar as the flow_dissector_func_proto() indicates which enables only bpf_skb_load_bytes() as well), therefore this patch fixes it to restrict to read-only. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Petar Penkov Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98fa0be35370..b0cc8f2ff95f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1387,21 +1387,23 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, enum bpf_access_type t) { switch (env->prog->type) { + /* Program types only with direct read access go here! */ case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_REUSEPORT: - /* dst_input() and dst_output() can't write for now */ + case BPF_PROG_TYPE_FLOW_DISSECTOR: if (t == BPF_WRITE) return false; /* fallthrough */ + + /* Program types with direct read + write access go here! */ case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_FLOW_DISSECTOR: if (meta) return meta->pkt_access; -- cgit v1.2.3 From d5563d367c2ce48ea3d675c77f7109f37311943d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:46 +0200 Subject: bpf: fix cg_skb types to hint access type in may_access_direct_pkt_data Commit b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") added direct packet access for skbs in cg_skb program types, however allowed access type was not added to the may_access_direct_pkt_data() helper. Therefore the latter always returns false. This is not directly an issue, it just means writes are unconditionally disabled (which is correct) but also reads. Latter is relevant in this function when BPF helpers may read direct packet data which is unconditionally disabled then. Fix it by properly adding BPF_PROG_TYPE_CGROUP_SKB to may_access_direct_pkt_data(). Fixes: b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b0cc8f2ff95f..5fc9a658af0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1393,6 +1393,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SKB: if (t == BPF_WRITE) return false; /* fallthrough */ -- cgit v1.2.3 From 80b0d86a176cab6201719b8dfd806902b0c6e046 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:47 +0200 Subject: bpf: fix direct packet write into pop/peek helpers Commit f1a2e44a3aec ("bpf: add queue and stack maps") probably just copy-pasted .pkt_access for bpf_map_{pop,peek}_elem() helpers, but this is buggy in this context since it would allow writes into cloned skbs which is invalid. Therefore, disable .pkt_access for the two. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Mauricio Vasquez B Acked-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ab0d5e3f9892..a74972b07e74 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -99,7 +99,6 @@ BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_pop_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, @@ -113,7 +112,6 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_peek_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, -- cgit v1.2.3 From d3f66e4116aff8dd0d5bd4067295b9ddb5e2c29c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:48 +0200 Subject: bpf: fix leaking uninitialized memory on pop/peek helpers Commit f1a2e44a3aec ("bpf: add queue and stack maps") added helpers with ARG_PTR_TO_UNINIT_MAP_VALUE. Meaning, the helper is supposed to fill the map value buffer with data instead of reading from it like in other helpers such as map update. However, given the buffer is allowed to be uninitialized (since we fill it in the helper anyway), it also means that the helper is obliged to wipe the memory in case of an error in order to not allow for leaking uninitialized memory. Given pop/peek is both handled inside __{stack,queue}_map_get(), lets wipe it there on error case, that is, empty stack/queue. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Mauricio Vasquez B Acked-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 12a93fb37449..8bbd72d3a121 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -122,6 +122,7 @@ static int __queue_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } @@ -151,6 +152,7 @@ static int __stack_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } -- cgit v1.2.3 From b09928b976280d64060d7bee146d7df5c5a29bef Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:49 +0200 Subject: bpf: make direct packet write unclone more robust Given this seems to be quite fragile and can easily slip through the cracks, lets make direct packet write more robust by requiring that future program types which allow for such write must provide a prologue callback. In case of XDP and sk_msg it's noop, thus add a generic noop handler there. The latter starts out with NULL data/data_end unconditionally when sg pages are shared. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- net/core/filter.c | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fc9a658af0e..171a2c88e77d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5709,7 +5709,11 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bool is_narrower_load; u32 target_size; - if (ops->gen_prologue) { + if (ops->gen_prologue || env->seen_direct_write) { + if (!ops->gen_prologue) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= ARRAY_SIZE(insn_buf)) { diff --git a/net/core/filter.c b/net/core/filter.c index 3fdddfa9a0fd..cd648d09a8e5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5644,6 +5644,15 @@ static bool sock_filter_is_valid_access(int off, int size, prog->expected_attach_type); } +static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + /* Neither direct read nor direct write requires any preliminary + * action. + */ + return 0; +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -7210,6 +7219,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops xdp_prog_ops = { @@ -7308,6 +7318,7 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { .get_func_proto = sk_msg_func_proto, .is_valid_access = sk_msg_is_valid_access, .convert_ctx_access = sk_msg_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops sk_msg_prog_ops = { -- cgit v1.2.3 From ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Oct 2018 01:11:04 +0200 Subject: bpf: add bpf_jit_limit knob to restrict unpriv allocations Rick reported that the BPF JIT could potentially fill the entire module space with BPF programs from unprivileged users which would prevent later attempts to load normal kernel modules or privileged BPF programs, for example. If JIT was enabled but unsuccessful to generate the image, then before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") we would always fall back to the BPF interpreter. Nowadays in the case where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort with a failure since the BPF interpreter was compiled out. Add a global limit and enforce it for unprivileged users such that in case of BPF interpreter compiled out we fail once the limit has been reached or we fall back to BPF interpreter earlier w/o using module mem if latter was compiled in. In a next step, fair share among unprivileged users can be resolved in particular for the case where we would fail hard once limit is reached. Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Co-Developed-by: Rick Edgecombe Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Jann Horn Cc: Kees Cook Cc: LKML Signed-off-by: Alexei Starovoitov --- Documentation/sysctl/net.txt | 8 ++++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 49 +++++++++++++++++++++++++++++++++++++++++--- net/core/sysctl_net_core.c | 10 +++++++-- 4 files changed, 63 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 9ecde517728c..2793d4eac55f 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -92,6 +92,14 @@ Values : 0 - disable JIT kallsyms export (default value) 1 - enable JIT kallsyms export for privileged users only +bpf_jit_limit +------------- + +This enforces a global limit for memory allocations to the BPF JIT +compiler in order to reject unprivileged JIT requests once it has +been surpassed. bpf_jit_limit contains the value of the global limit +in bytes. + dev_weight -------------- diff --git a/include/linux/filter.h b/include/linux/filter.h index 91b4c934f02e..de629b706d1d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; +extern int bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7c7eeea8cffc..6377225b2082 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT +# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) + /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; +int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; } +static atomic_long_t bpf_jit_current; + +#if defined(MODULES_VADDR) +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, + PAGE_SIZE), INT_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); +#endif + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) + return NULL; hdr = module_alloc(size); - if (hdr == NULL) + if (!hdr) { + bpf_jit_uncharge_modmem(pages); return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } /* This symbol is only overridden by archs that have different diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b1a2c5e38530..37b4667128a3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } -# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -# endif #endif static struct ctl_table net_core_table[] = { @@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = { .extra2 = &one, }, # endif + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &one, + }, #endif { .procname = "netdev_tstamp_prequeue", -- cgit v1.2.3 From da387e5c930f43d9f3b011a6fbb33bdf43d9714c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 17 Oct 2018 09:51:43 +0300 Subject: tracing: Export trace_dump_stack to modules There is no reason for this function to be unexprted and it's a useful debugging aid. Link: http://lkml.kernel.org/r/1539759103-5923-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ec5b21778806..ff1c4b20cd0a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2727,6 +2727,7 @@ void trace_dump_stack(int skip) __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, skip, preempt_count(), NULL); } +EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); -- cgit v1.2.3 From a2acce536921bd793bae13fa344fcea157638e72 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 15 Oct 2018 23:14:28 -0400 Subject: tracing: Have stack tracer trace full stack The stack tracer traces every function call checking the current stack (in non interrupt context), looking for the deepest stack, and saving it when it finds a new max depth. The problem is that it calls save_stack_trace(), and with the new ORC unwinder, it can skip too much. As it looks at the ip of the function call in the backtrace to find where it should start, it doesn't need to skip anything. The stack trace selftest would fail when the kernel was complied with the ORC UNDWINDER enabled. Without skipping functions when doing the stack trace, it now passes again. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 4237eba4ef20..2b0d1ee3241c 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,7 @@ check_stack(unsigned long ip, unsigned long *stack) stack_trace_max_size = this_size; stack_trace_max.nr_entries = 0; - stack_trace_max.skip = 3; + stack_trace_max.skip = 0; save_stack_trace(&stack_trace_max); -- cgit v1.2.3 From 18858511fd8a877303cc34c06efa461b26a0e070 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 22 Oct 2018 00:08:20 +0900 Subject: tracing: Return -ENOENT if there is no target synthetic event Return -ENOENT error if there is no target synthetic event. This notices an operation failure to user as below; # echo 'wakeup_latency u64 lat; pid_t pid;' > synthetic_events # echo '!wakeup' >> synthetic_events sh: write error: No such file or directory Link: http://lkml.kernel.org/r/154013449986.25576.9487131386597290172.stgit@devbox Acked-by: Tom Zanussi Tested-by: Tom Zanussi Cc: Shuah Khan Cc: Rajvi Jingar Cc: stable@vger.kernel.org Fixes: 4b147936fa50 ('tracing: Add support for 'synthetic' events') Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index d239004aaf29..eb908ef2ecec 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1063,8 +1063,10 @@ static int create_synth_event(int argc, char **argv) event = NULL; ret = -EEXIST; goto out; - } else if (delete_event) + } else if (delete_event) { + ret = -ENOENT; goto out; + } if (argc < 2) { ret = -EINVAL; -- cgit v1.2.3 From ea6f650465c61c70b4c96648ed2cb8a0c55db337 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Tue, 30 Oct 2018 15:04:51 -0700 Subject: kernel/fail_function.c: remove meaningless null pointer check before debugfs_remove_recursive debugfs_remove_recursive() has taken the null pointer into account. just remove the null check before debugfs_remove_recursive(). Link: http://lkml.kernel.org/r/1537494404-16473-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Acked-by: Masami Hiramatsu Acked-by: Kees Cook Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fail_function.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/fail_function.c b/kernel/fail_function.c index bc80a4e268c0..17f75b545f66 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -173,8 +173,7 @@ static void fei_debugfs_remove_attr(struct fei_attr *attr) struct dentry *dir; dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir); - if (dir) - debugfs_remove_recursive(dir); + debugfs_remove_recursive(dir); } static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs) -- cgit v1.2.3 From 2e58f57d859fbe0ce79e052d7a63bba32d5786e8 Mon Sep 17 00:00:00 2001 From: Weikang Shi Date: Tue, 30 Oct 2018 15:07:05 -0700 Subject: kernel/signal.c: fix a comment error Because get_signal_to_deliver() was renamed to get_signal() the comment should be fixed. Link: http://lkml.kernel.org/r/1539179128-45709-1-git-send-email-swkhack@gmail.com Signed-off-by: Weikang Shi Reported-by: Christian Brauner Cc: Eric W. Biederman Cc: Oleg Nesterov Cc: Anna-Maria Gleixner Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/signal.c | 4 ++-- kernel/signal.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 9a960829a01d..99099f73b207 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -344,10 +344,10 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) get_signal(&ksig); /* - * get_signal_to_deliver() may have run a debugger (via notify_parent()) + * get_signal() may have run a debugger (via notify_parent()) * and the debugger may have modified the state (e.g., to arrange for an * inferior call), thus it's important to check for restarting _after_ - * get_signal_to_deliver(). + * get_signal(). */ if ((long) scr->pt.r10 != -1) /* diff --git a/kernel/signal.c b/kernel/signal.c index 17565240b1c6..9a32bc2088c9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -892,7 +892,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) /* * The first thread which returns from do_signal_stop() * will take ->siglock, notice SIGNAL_CLD_MASK, and - * notify its parent. See get_signal_to_deliver(). + * notify its parent. See get_signal(). */ signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED); signal->group_stop_count = 0; -- cgit v1.2.3 From 95c4fb78fb23081472465ca20d5d31c4b780ed82 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 30 Oct 2018 15:07:13 -0700 Subject: kernel/panic.c: do not append newline to the stack protector panic string ... because panic() itself already does this. Otherwise you have line-broken trailer: [ 1.836965] ---[ end Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: pgd_alloc+0x29e/0x2a0 [ 1.836965] ]--- Link: http://lkml.kernel.org/r/20181008202901.7894-1-bp@alien8.de Signed-off-by: Borislav Petkov Acked-by: Kees Cook Cc: Masahiro Yamada Cc: "Steven Rostedt (VMware)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 8b2e002d52eb..837a94b7024d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -631,7 +631,7 @@ device_initcall(register_warn_debugfs); */ __visible void __stack_chk_fail(void) { - panic("stack-protector: Kernel stack is corrupted in: %pB\n", + panic("stack-protector: Kernel stack is corrupted in: %pB", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); -- cgit v1.2.3 From b49dec1cf8ff1e0b204dd2c30b95a92d75591146 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 30 Oct 2018 15:07:17 -0700 Subject: kernel/panic.c: filter out a potential trailing newline If a call to panic() terminates the string with a \n , the result puts the closing brace ']---' on a newline because panic() itself adds \n too. Now, if one goes and removes the newline chars from all panic() invocations - and the stats right now look like this: ~300 calls with a \n ~500 calls without a \n one is destined to a neverending game of whack-a-mole because the usual thing to do is add a newline at the end of a string a function is supposed to print. Therefore, simply zap any \n at the end of the panic string to avoid touching so many places in the kernel. Link: http://lkml.kernel.org/r/20181009205019.2786-1-bp@alien8.de Signed-off-by: Borislav Petkov Acked-by: Kees Cook Reviewed-by: Steven Rostedt (VMware) Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 837a94b7024d..f6d549a29a5c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -136,7 +136,7 @@ void panic(const char *fmt, ...) { static char buf[1024]; va_list args; - long i, i_next = 0; + long i, i_next = 0, len; int state = 0; int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; @@ -173,8 +173,12 @@ void panic(const char *fmt, ...) console_verbose(); bust_spinlocks(1); va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); + len = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); + + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + pr_emerg("Kernel panic - not syncing: %s\n", buf); #ifdef CONFIG_DEBUG_BUGVERBOSE /* -- cgit v1.2.3 From 6a32c2469c3fbfee8f25bcd20af647326650a6cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Oct 2018 15:07:32 -0700 Subject: kbuild: fix kernel/bounds.c 'W=1' warning Building any configuration with 'make W=1' produces a warning: kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes] When also passing -Werror, this prevents us from building any other files. Nobody ever calls the function, but we can't make it 'static' either since we want the compiler output. Calling it 'main' instead however avoids the warning, because gcc does not insist on having a declaration for main. Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reported-by: Kieran Bingham Reviewed-by: Kieran Bingham Cc: David Laight Cc: Masahiro Yamada Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/bounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bounds.c b/kernel/bounds.c index c373e887c066..9795d75b09b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -13,7 +13,7 @@ #include #include -void foo(void) +int main(void) { /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); @@ -23,4 +23,6 @@ void foo(void) #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ + + return 0; } -- cgit v1.2.3 From eb31d559f1e8390195372cd51cfb198da8bc84b9 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 30 Oct 2018 15:08:04 -0700 Subject: memblock: remove _virt from APIs returning virtual address The conversion is done using sed -i 's@memblock_virt_alloc@memblock_alloc@g' \ $(git grep -l memblock_virt_alloc) Link: http://lkml.kernel.org/r/1536927045-23536-8-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Catalin Marinas Cc: Chris Zankel Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Burton Cc: Richard Kuo Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Serge Semin Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/setup.c | 4 ++-- arch/arm/mach-omap2/omap_hwmod.c | 6 ++--- arch/arm64/mm/kasan_init.c | 2 +- arch/arm64/mm/numa.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/lib/alloc.c | 2 +- arch/powerpc/mm/mmu_context_nohash.c | 6 ++--- arch/powerpc/platforms/powermac/nvram.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 6 ++--- arch/powerpc/platforms/ps3/setup.c | 2 +- arch/powerpc/sysdev/msi_bitmap.c | 2 +- arch/s390/kernel/setup.c | 8 +++---- arch/s390/kernel/smp.c | 2 +- arch/s390/kernel/topology.c | 4 ++-- arch/s390/numa/mode_emu.c | 2 +- arch/s390/numa/toptree.c | 2 +- arch/x86/mm/kasan_init_64.c | 4 ++-- arch/xtensa/mm/kasan_init.c | 2 +- drivers/clk/ti/clk.c | 2 +- drivers/firmware/memmap.c | 2 +- drivers/of/fdt.c | 2 +- drivers/of/unittest.c | 2 +- include/linux/bootmem.h | 38 +++++++++++++++---------------- init/main.c | 6 ++--- kernel/dma/swiotlb.c | 6 ++--- kernel/power/snapshot.c | 2 +- kernel/printk/printk.c | 4 ++-- lib/cpumask.c | 2 +- mm/hugetlb.c | 2 +- mm/kasan/kasan_init.c | 2 +- mm/memblock.c | 26 ++++++++++----------- mm/page_alloc.c | 8 +++---- mm/page_ext.c | 2 +- mm/percpu.c | 28 +++++++++++------------ mm/sparse-vmemmap.c | 2 +- mm/sparse.c | 12 +++++----- 37 files changed, 105 insertions(+), 105 deletions(-) (limited to 'kernel') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 4c249cb261f3..39e6090d23ac 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -857,7 +857,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) */ boot_alias_start = phys_to_idmap(start); if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) { - res = memblock_virt_alloc(sizeof(*res), 0); + res = memblock_alloc(sizeof(*res), 0); res->name = "System RAM (boot alias)"; res->start = boot_alias_start; res->end = phys_to_idmap(end); @@ -865,7 +865,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) request_resource(&iomem_resource, res); } - res = memblock_virt_alloc(sizeof(*res), 0); + res = memblock_alloc(sizeof(*res), 0); res->name = "System RAM"; res->start = start; res->end = end; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 56a1fe90d394..1f9b34a7eccd 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np) u64 size; int i; - provider = memblock_virt_alloc(sizeof(*provider), 0); + provider = memblock_alloc(sizeof(*provider), 0); if (!provider) return -ENOMEM; @@ -736,12 +736,12 @@ static int __init _setup_clkctrl_provider(struct device_node *np) of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2; provider->addr = - memblock_virt_alloc(sizeof(void *) * provider->num_addrs, 0); + memblock_alloc(sizeof(void *) * provider->num_addrs, 0); if (!provider->addr) return -ENOMEM; provider->size = - memblock_virt_alloc(sizeof(u32) * provider->num_addrs, 0); + memblock_alloc(sizeof(u32) * provider->num_addrs, 0); if (!provider->size) return -ENOMEM; diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index fccb1a6f8c6f..6a65a2912d36 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -38,7 +38,7 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); static phys_addr_t __init kasan_alloc_zeroed_page(int node) { - void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); return __pa(p); diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index c7fb34efd23e..0bff116c07a8 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -168,7 +168,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, { int nid = early_cpu_to_node(cpu); - return memblock_virt_alloc_try_nid(size, align, + return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 01a5ff4c41ff..0c997645e8f0 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -859,7 +859,7 @@ static void __init arch_mem_init(char **cmdline_p) * Prevent memblock from allocating high memory. * This cannot be done before max_low_pfn is detected, so up * to this point is possible to only reserve physical memory - * with memblock_reserve; memblock_virt_alloc* can be used + * with memblock_reserve; memblock_alloc* can be used * only after this point */ memblock_set_current_limit(PFN_PHYS(max_low_pfn)); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 4da8ed576229..d39ec3a4550a 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -203,7 +203,7 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); + of_prop = memblock_alloc(sizeof(struct property) + 256, 0); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index 06796dec01ea..bf87d6e13369 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) if (slab_is_available()) p = kzalloc(size, mask); else { - p = memblock_virt_alloc(size, 0); + p = memblock_alloc(size, 0); } return p; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 4d80239ef83c..954f1986af4d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -461,10 +461,10 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); - context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0); + context_map = memblock_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0); #ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0); cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, "powerpc/mmu/ctx:prepare", diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 60b03a1703d1..f45b369177a4 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); + nvram_image = memblock_alloc(NVRAM_SIZE, 0); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cde710297a4e..23a67b545b70 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3770,7 +3770,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_virt_alloc(sizeof(*phb), 0); + phb = memblock_alloc(sizeof(*phb), 0); /* Allocate PCI controller */ phb->hose = hose = pcibios_alloc_controller(np); @@ -3816,7 +3816,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0); + phb->diag_data = memblock_alloc(phb->diag_data_size, 0); /* Parse 32-bit and IO ranges (if any) */ pci_process_bridge_OF_ranges(hose, np, !hose->global_number); @@ -3875,7 +3875,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = memblock_virt_alloc(size, 0); + aux = memblock_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 77a37520068d..12519857a33c 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -126,7 +126,7 @@ static void __init prealloc(struct ps3_prealloc *p) if (!p->size) return; - p->address = memblock_virt_alloc(p->size, p->align); + p->address = memblock_alloc(p->size, p->align); printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, p->address); diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index e64a411d1a00..349a9ff6ca5b 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, if (bmp->bitmap_from_slab) bmp->bitmap = kzalloc(size, GFP_KERNEL); else { - bmp->bitmap = memblock_virt_alloc(size, 0); + bmp->bitmap = memblock_alloc(size, 0); /* the bitmap won't be freed from memblock allocator */ kmemleak_not_leak(bmp->bitmap); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 204ccfa54bf3..781c1053a773 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -378,7 +378,7 @@ static void __init setup_lowcore(void) * Setup lowcore for boot cpu */ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); - lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc)); + lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; lc->external_new_psw.mask = PSW_KERNEL_BITS | @@ -422,7 +422,7 @@ static void __init setup_lowcore(void) * Allocate the global restart stack which is the same for * all CPUs in cast *one* of them does a PSW restart. */ - restart_stack = memblock_virt_alloc(THREAD_SIZE, THREAD_SIZE); + restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE); restart_stack += STACK_INIT_OFFSET; /* @@ -488,7 +488,7 @@ static void __init setup_resources(void) bss_resource.end = (unsigned long) __bss_stop - 1; for_each_memblock(memory, reg) { - res = memblock_virt_alloc(sizeof(*res), 8); + res = memblock_alloc(sizeof(*res), 8); res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; @@ -502,7 +502,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = memblock_virt_alloc(sizeof(*sub_res), 8); + sub_res = memblock_alloc(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1b3188f57b58..44f9a7d6450b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -761,7 +761,7 @@ void __init smp_detect_cpus(void) u16 address; /* Get CPU information */ - info = memblock_virt_alloc(sizeof(*info), 8); + info = memblock_alloc(sizeof(*info), 8); smp_get_core_info(info, 1); /* Find boot CPU type */ if (sclp.has_core_type) { diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index e8184a15578a..799a91882a76 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -519,7 +519,7 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = memblock_virt_alloc(sizeof(*mask->next), 8); + mask->next = memblock_alloc(sizeof(*mask->next), 8); mask = mask->next; } } @@ -537,7 +537,7 @@ void __init topology_init_early(void) } if (!MACHINE_HAS_TOPOLOGY) goto out; - tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE); + tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 83b222c57609..5a381fc8e958 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -313,7 +313,7 @@ static void __ref create_core_to_node_map(void) { int i; - emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8); + emu_cores = memblock_alloc(sizeof(*emu_cores), 8); for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++) emu_cores->to_node_id[i] = NODE_ID_FREE; } diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c index 21d1e8a1546d..7f61cc3fd4d1 100644 --- a/arch/s390/numa/toptree.c +++ b/arch/s390/numa/toptree.c @@ -34,7 +34,7 @@ struct toptree __ref *toptree_alloc(int level, int id) if (slab_is_available()) res = kzalloc(sizeof(*res), GFP_KERNEL); else - res = memblock_virt_alloc(sizeof(*res), 8); + res = memblock_alloc(sizeof(*res), 8); if (!res) return res; diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index e3e77527f8df..77b857cb036f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -28,10 +28,10 @@ static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static __init void *early_alloc(size_t size, int nid, bool panic) { if (panic) - return memblock_virt_alloc_try_nid(size, size, + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); else - return memblock_virt_alloc_try_nid_nopanic(size, size, + return memblock_alloc_try_nid_nopanic(size, size, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); } diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 6b532b6bd785..1a30a258ccd0 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -43,7 +43,7 @@ static void __init populate(void *start, void *end) unsigned long vaddr = (unsigned long)start; pgd_t *pgd = pgd_offset_k(vaddr); pmd_t *pmd = pmd_offset(pgd, vaddr); - pte_t *pte = memblock_virt_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); + pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); pr_debug("%s: %p - %p\n", __func__, start, end); diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 7d22e1af2247..5c54d3734daf 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) { struct clk_iomap *io; - io = memblock_virt_alloc(sizeof(*io), 0); + io = memblock_alloc(sizeof(*io), 0); io->mem = mem; diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 5de3ed29282c..03cead6d5f97 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0); + entry = memblock_alloc(sizeof(struct firmware_map_entry), 0); if (WARN_ON(!entry)) return -ENOMEM; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 4f915cea6f75..ffe62a7ae19b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1179,7 +1179,7 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, static void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return memblock_virt_alloc(size, align); + return memblock_alloc(size, align); } bool __init early_init_dt_verify(void *params) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index a3a6866765f2..01e23b85e798 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -2192,7 +2192,7 @@ static struct device_node *overlay_base_root; static void * __init dt_alloc_memory(u64 size, u64 align) { - return memblock_virt_alloc(size, align); + return memblock_alloc(size, align); } /* diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b74bafd110b9..7d91f0f5ee44 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -95,78 +95,78 @@ extern void *__alloc_bootmem_low(unsigned long size, #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ -void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, +void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size, +void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align, +void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); void __memblock_free_early(phys_addr_t base, phys_addr_t size); void __memblock_free_late(phys_addr_t base, phys_addr_t size); -static inline void * __init memblock_virt_alloc( +static inline void * __init memblock_alloc( phys_addr_t size, phys_addr_t align) { - return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, + return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_raw( +static inline void * __init memblock_alloc_raw( phys_addr_t size, phys_addr_t align) { - return memblock_virt_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, + return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_nopanic( +static inline void * __init memblock_alloc_nopanic( phys_addr_t size, phys_addr_t align) { - return memblock_virt_alloc_try_nid_nopanic(size, align, + return memblock_alloc_try_nid_nopanic(size, align, BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_low( +static inline void * __init memblock_alloc_low( phys_addr_t size, phys_addr_t align) { - return memblock_virt_alloc_try_nid(size, align, + return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_low_nopanic( +static inline void * __init memblock_alloc_low_nopanic( phys_addr_t size, phys_addr_t align) { - return memblock_virt_alloc_try_nid_nopanic(size, align, + return memblock_alloc_try_nid_nopanic(size, align, BOOTMEM_LOW_LIMIT, ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_from_nopanic( +static inline void * __init memblock_alloc_from_nopanic( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { - return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr, + return memblock_alloc_try_nid_nopanic(size, align, min_addr, BOOTMEM_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_virt_alloc_node( +static inline void * __init memblock_alloc_node( phys_addr_t size, int nid) { - return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT, + return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE, nid); } -static inline void * __init memblock_virt_alloc_node_nopanic( +static inline void * __init memblock_alloc_node_nopanic( phys_addr_t size, int nid) { - return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT, + return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE, nid); } diff --git a/init/main.c b/init/main.c index 1c3f90264280..86b59cf3bec7 100644 --- a/init/main.c +++ b/init/main.c @@ -375,10 +375,10 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } static void __init setup_command_line(char *command_line) { saved_command_line = - memblock_virt_alloc(strlen(boot_command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, 0); initcall_command_line = - memblock_virt_alloc(strlen(boot_command_line) + 1, 0); - static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, 0); + static_command_line = memblock_alloc(strlen(command_line) + 1, 0); strcpy(saved_command_line, boot_command_line); strcpy(static_command_line, command_line); } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ebecaf255ea2..801da67e957b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -204,10 +204,10 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = memblock_virt_alloc( + io_tlb_list = memblock_alloc( PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), PAGE_SIZE); - io_tlb_orig_addr = memblock_virt_alloc( + io_tlb_orig_addr = memblock_alloc( PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), PAGE_SIZE); for (i = 0; i < io_tlb_nslabs; i++) { @@ -242,7 +242,7 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); + vstart = memblock_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3d37c279c090..34116a6097be 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -963,7 +963,7 @@ void __init __register_nosave_region(unsigned long start_pfn, BUG_ON(!region); } else { /* This allocation cannot fail */ - region = memblock_virt_alloc(sizeof(struct nosave_region), 0); + region = memblock_alloc(sizeof(struct nosave_region), 0); } region->start_pfn = start_pfn; region->end_pfn = end_pfn; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b77150ad1965..429e4a3833ca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1111,9 +1111,9 @@ void __init setup_log_buf(int early) if (early) { new_log_buf = - memblock_virt_alloc(new_log_buf_len, LOG_ALIGN); + memblock_alloc(new_log_buf_len, LOG_ALIGN); } else { - new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, + new_log_buf = memblock_alloc_nopanic(new_log_buf_len, LOG_ALIGN); } diff --git a/lib/cpumask.c b/lib/cpumask.c index beca6244671a..1405cb22e6bc 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = memblock_virt_alloc(cpumask_size(), 0); + *mask = memblock_alloc(cpumask_size(), 0); } /** diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7b5c0ad9a6bd..51e9f17dbd5c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2100,7 +2100,7 @@ int __alloc_bootmem_huge_page(struct hstate *h) for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { void *addr; - addr = memblock_virt_alloc_try_nid_raw( + addr = memblock_alloc_try_nid_raw( huge_page_size(h), huge_page_size(h), 0, BOOTMEM_ALLOC_ACCESSIBLE, node); if (addr) { diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 7a2a2f13f86f..24d734bdff6b 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -83,7 +83,7 @@ static inline bool kasan_zero_page_entry(pte_t pte) static __init void *early_alloc(size_t size, int node) { - return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, node); } diff --git a/mm/memblock.c b/mm/memblock.c index 20358374e8a8..58340de3ebc6 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1319,7 +1319,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali } /** - * memblock_virt_alloc_internal - allocate boot memory block + * memblock_alloc_internal - allocate boot memory block * @size: size of memory block to be allocated in bytes * @align: alignment of the region and block's size * @min_addr: the lower bound of the memory region to allocate (phys address) @@ -1345,7 +1345,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali * Return: * Virtual address of allocated memory block on success, NULL on failure. */ -static void * __init memblock_virt_alloc_internal( +static void * __init memblock_alloc_internal( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid) @@ -1412,7 +1412,7 @@ done: } /** - * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing + * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing * memory and without panicking * @size: size of memory block to be allocated in bytes * @align: alignment of the region and block's size @@ -1430,7 +1430,7 @@ done: * Return: * Virtual address of allocated memory block on success, NULL on failure. */ -void * __init memblock_virt_alloc_try_nid_raw( +void * __init memblock_alloc_try_nid_raw( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid) @@ -1441,7 +1441,7 @@ void * __init memblock_virt_alloc_try_nid_raw( __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); - ptr = memblock_virt_alloc_internal(size, align, + ptr = memblock_alloc_internal(size, align, min_addr, max_addr, nid); if (ptr && size > 0) page_init_poison(ptr, size); @@ -1450,7 +1450,7 @@ void * __init memblock_virt_alloc_try_nid_raw( } /** - * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block + * memblock_alloc_try_nid_nopanic - allocate boot memory block * @size: size of memory block to be allocated in bytes * @align: alignment of the region and block's size * @min_addr: the lower bound of the memory region from where the allocation @@ -1466,7 +1466,7 @@ void * __init memblock_virt_alloc_try_nid_raw( * Return: * Virtual address of allocated memory block on success, NULL on failure. */ -void * __init memblock_virt_alloc_try_nid_nopanic( +void * __init memblock_alloc_try_nid_nopanic( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid) @@ -1477,7 +1477,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic( __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); - ptr = memblock_virt_alloc_internal(size, align, + ptr = memblock_alloc_internal(size, align, min_addr, max_addr, nid); if (ptr) memset(ptr, 0, size); @@ -1485,7 +1485,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic( } /** - * memblock_virt_alloc_try_nid - allocate boot memory block with panicking + * memblock_alloc_try_nid - allocate boot memory block with panicking * @size: size of memory block to be allocated in bytes * @align: alignment of the region and block's size * @min_addr: the lower bound of the memory region from where the allocation @@ -1495,14 +1495,14 @@ void * __init memblock_virt_alloc_try_nid_nopanic( * allocate only from memory limited by memblock.current_limit value * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * - * Public panicking version of memblock_virt_alloc_try_nid_nopanic() + * Public panicking version of memblock_alloc_try_nid_nopanic() * which provides debug information (including caller info), if enabled, * and panics if the request can not be satisfied. * * Return: * Virtual address of allocated memory block on success, NULL on failure. */ -void * __init memblock_virt_alloc_try_nid( +void * __init memblock_alloc_try_nid( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid) @@ -1512,7 +1512,7 @@ void * __init memblock_virt_alloc_try_nid( memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); - ptr = memblock_virt_alloc_internal(size, align, + ptr = memblock_alloc_internal(size, align, min_addr, max_addr, nid); if (ptr) { memset(ptr, 0, size); @@ -1529,7 +1529,7 @@ void * __init memblock_virt_alloc_try_nid( * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * - * Free boot memory block previously allocated by memblock_virt_alloc_xx() API. + * Free boot memory block previously allocated by memblock_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ void __init __memblock_free_early(phys_addr_t base, phys_addr_t size) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 59d171f84445..8ca6954fdcdc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6209,7 +6209,7 @@ static void __ref setup_usemap(struct pglist_data *pgdat, zone->pageblock_flags = NULL; if (usemapsize) zone->pageblock_flags = - memblock_virt_alloc_node_nopanic(usemapsize, + memblock_alloc_node_nopanic(usemapsize, pgdat->node_id); } #else @@ -6439,7 +6439,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); - map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id); + map = memblock_alloc_node_nopanic(size, pgdat->node_id); pgdat->node_mem_map = map + offset; } pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", @@ -7711,9 +7711,9 @@ void *__init alloc_large_system_hash(const char *tablename, size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) - table = memblock_virt_alloc_nopanic(size, 0); + table = memblock_alloc_nopanic(size, 0); else - table = memblock_virt_alloc_raw(size, 0); + table = memblock_alloc_raw(size, 0); } else if (hashdist) { table = __vmalloc(size, gfp_flags, PAGE_KERNEL); } else { diff --git a/mm/page_ext.c b/mm/page_ext.c index a9826da84ccb..e77c0f031dd0 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -161,7 +161,7 @@ static int __init alloc_node_page_ext(int nid) table_size = get_entry_size() * nr_pages; - base = memblock_virt_alloc_try_nid_nopanic( + base = memblock_alloc_try_nid_nopanic( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); if (!base) diff --git a/mm/percpu.c b/mm/percpu.c index 4b90682623e9..3050c1d37d37 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1101,7 +1101,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, region_size = ALIGN(start_offset + map_size, lcm_align); /* allocate chunk */ - chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) + + chunk = memblock_alloc(sizeof(struct pcpu_chunk) + BITS_TO_LONGS(region_size >> PAGE_SHIFT), 0); @@ -1114,11 +1114,11 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, chunk->nr_pages = region_size >> PAGE_SHIFT; region_bits = pcpu_chunk_map_bits(chunk); - chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) * + chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]), 0); - chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) * + chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]), 0); - chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) * + chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]), 0); pcpu_init_md_blocks(chunk); @@ -1888,7 +1888,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, __alignof__(ai->groups[0].cpu_map[0])); ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); - ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE); + ptr = memblock_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE); if (!ptr) return NULL; ai = ptr; @@ -2075,12 +2075,12 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); /* process group information and build config tables accordingly */ - group_offsets = memblock_virt_alloc(ai->nr_groups * + group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]), 0); - group_sizes = memblock_virt_alloc(ai->nr_groups * + group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]), 0); - unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0); - unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0); + unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0); + unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0); for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = UINT_MAX; @@ -2144,7 +2144,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, * empty chunks. */ pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; - pcpu_slot = memblock_virt_alloc( + pcpu_slot = memblock_alloc( pcpu_nr_slots * sizeof(pcpu_slot[0]), 0); for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); @@ -2458,7 +2458,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); - areas = memblock_virt_alloc_nopanic(areas_size, 0); + areas = memblock_alloc_nopanic(areas_size, 0); if (!areas) { rc = -ENOMEM; goto out_free; @@ -2599,7 +2599,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, /* unaligned allocations can't be freed, round up to page size */ pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * sizeof(pages[0])); - pages = memblock_virt_alloc(pages_size, 0); + pages = memblock_alloc(pages_size, 0); /* allocate pages */ j = 0; @@ -2688,7 +2688,7 @@ EXPORT_SYMBOL(__per_cpu_offset); static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return memblock_virt_alloc_from_nopanic( + return memblock_alloc_from_nopanic( size, align, __pa(MAX_DMA_ADDRESS)); } @@ -2737,7 +2737,7 @@ void __init setup_per_cpu_areas(void) void *fc; ai = pcpu_alloc_alloc_info(1, 1); - fc = memblock_virt_alloc_from_nopanic(unit_size, + fc = memblock_alloc_from_nopanic(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!ai || !fc) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8301293331a2..91c2c3d25827 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -42,7 +42,7 @@ static void * __ref __earlyonly_bootmem_alloc(int node, unsigned long align, unsigned long goal) { - return memblock_virt_alloc_try_nid_raw(size, align, goal, + return memblock_alloc_try_nid_raw(size, align, goal, BOOTMEM_ALLOC_ACCESSIBLE, node); } diff --git a/mm/sparse.c b/mm/sparse.c index 67ad061f7fb8..cb900dda7fd2 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -68,7 +68,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid) if (slab_is_available()) section = kzalloc_node(array_size, GFP_KERNEL, nid); else - section = memblock_virt_alloc_node(array_size, nid); + section = memblock_alloc_node(array_size, nid); return section; } @@ -216,7 +216,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) size = sizeof(struct mem_section*) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); - mem_section = memblock_virt_alloc(size, align); + mem_section = memblock_alloc(size, align); } #endif @@ -306,7 +306,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: - p = memblock_virt_alloc_try_nid_nopanic(size, + p = memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, goal, limit, nid); if (!p && limit) { @@ -362,7 +362,7 @@ static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { - return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); + return memblock_alloc_node_nopanic(size, pgdat->node_id); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -391,7 +391,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, if (map) return map; - map = memblock_virt_alloc_try_nid(size, + map = memblock_alloc_try_nid(size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); return map; @@ -405,7 +405,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) { WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ sparsemap_buf = - memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE, + memblock_alloc_try_nid_raw(size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); sparsemap_buf_end = sparsemap_buf + size; -- cgit v1.2.3 From 57c8a661d95dff48dd9c2f2496139082bbaf241a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 30 Oct 2018 15:09:49 -0700 Subject: mm: remove include/linux/bootmem.h Move remaining definitions and declarations from include/linux/bootmem.h into include/linux/memblock.h and remove the redundant header. The includes were replaced with the semantic patch below and then semi-automated removal of duplicated '#include @@ @@ - #include + #include [sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au [sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au [sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal] Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Stephen Rothwell Acked-by: Michal Hocko Cc: Catalin Marinas Cc: Chris Zankel Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Burton Cc: Richard Kuo Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Serge Semin Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/core_cia.c | 2 +- arch/alpha/kernel/core_irongate.c | 1 - arch/alpha/kernel/core_marvel.c | 2 +- arch/alpha/kernel/core_titan.c | 2 +- arch/alpha/kernel/core_tsunami.c | 2 +- arch/alpha/kernel/pci-noop.c | 2 +- arch/alpha/kernel/pci.c | 2 +- arch/alpha/kernel/pci_iommu.c | 2 +- arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/mm/init.c | 2 +- arch/alpha/mm/numa.c | 1 - arch/arc/kernel/unwind.c | 2 +- arch/arc/mm/highmem.c | 2 +- arch/arc/mm/init.c | 1 - arch/arm/kernel/devtree.c | 1 - arch/arm/kernel/setup.c | 1 - arch/arm/mach-omap2/omap_hwmod.c | 2 +- arch/arm/mm/dma-mapping.c | 1 - arch/arm/mm/init.c | 1 - arch/arm/xen/mm.c | 1 - arch/arm/xen/p2m.c | 2 +- arch/arm64/kernel/acpi.c | 1 - arch/arm64/kernel/acpi_numa.c | 1 - arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/dma-mapping.c | 2 +- arch/arm64/mm/init.c | 1 - arch/arm64/mm/kasan_init.c | 1 - arch/arm64/mm/numa.c | 1 - arch/c6x/kernel/setup.c | 1 - arch/c6x/mm/init.c | 2 +- arch/h8300/kernel/setup.c | 1 - arch/h8300/mm/init.c | 2 +- arch/hexagon/kernel/dma.c | 2 +- arch/hexagon/kernel/setup.c | 2 +- arch/hexagon/mm/init.c | 1 - arch/ia64/kernel/crash.c | 2 +- arch/ia64/kernel/efi.c | 2 +- arch/ia64/kernel/ia64_ksyms.c | 2 +- arch/ia64/kernel/iosapic.c | 2 +- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/mca_drv.c | 2 +- arch/ia64/kernel/setup.c | 1 - arch/ia64/kernel/smpboot.c | 2 +- arch/ia64/kernel/topology.c | 2 +- arch/ia64/kernel/unwind.c | 2 +- arch/ia64/mm/contig.c | 1 - arch/ia64/mm/discontig.c | 1 - arch/ia64/mm/init.c | 1 - arch/ia64/mm/numa.c | 2 +- arch/ia64/mm/tlb.c | 2 +- arch/ia64/pci/pci.c | 2 +- arch/ia64/sn/kernel/bte.c | 2 +- arch/ia64/sn/kernel/io_common.c | 2 +- arch/ia64/sn/kernel/setup.c | 2 +- arch/m68k/atari/stram.c | 2 +- arch/m68k/coldfire/m54xx.c | 2 +- arch/m68k/kernel/setup_mm.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/kernel/uboot.c | 2 +- arch/m68k/mm/init.c | 2 +- arch/m68k/mm/mcfmmu.c | 1 - arch/m68k/mm/motorola.c | 1 - arch/m68k/mm/sun3mmu.c | 2 +- arch/m68k/sun3/config.c | 2 +- arch/m68k/sun3/dvma.c | 2 +- arch/m68k/sun3/mmu_emu.c | 2 +- arch/m68k/sun3/sun3dvma.c | 2 +- arch/m68k/sun3x/dvma.c | 2 +- arch/microblaze/mm/consistent.c | 2 +- arch/microblaze/mm/init.c | 3 +- arch/microblaze/pci/pci-common.c | 2 +- arch/mips/ar7/memory.c | 2 +- arch/mips/ath79/setup.c | 2 +- arch/mips/bcm63xx/prom.c | 2 +- arch/mips/bcm63xx/setup.c | 2 +- arch/mips/bmips/setup.c | 2 +- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/dec/prom/memory.c | 2 +- arch/mips/emma/common/prom.c | 2 +- arch/mips/fw/arc/memory.c | 2 +- arch/mips/jazz/jazzdma.c | 2 +- arch/mips/kernel/crash.c | 2 +- arch/mips/kernel/crash_dump.c | 2 +- arch/mips/kernel/prom.c | 2 +- arch/mips/kernel/setup.c | 1 - arch/mips/kernel/traps.c | 1 - arch/mips/kernel/vpe.c | 2 +- arch/mips/kvm/commpage.c | 2 +- arch/mips/kvm/dyntrans.c | 2 +- arch/mips/kvm/emulate.c | 2 +- arch/mips/kvm/interrupt.c | 2 +- arch/mips/kvm/mips.c | 2 +- arch/mips/lantiq/prom.c | 2 +- arch/mips/lasat/prom.c | 2 +- arch/mips/loongson64/common/init.c | 2 +- arch/mips/loongson64/loongson-3/numa.c | 1 - arch/mips/mm/init.c | 2 +- arch/mips/mm/pgtable-32.c | 2 +- arch/mips/mti-malta/malta-memory.c | 2 +- arch/mips/netlogic/xlp/dt.c | 2 +- arch/mips/pci/pci-legacy.c | 2 +- arch/mips/pci/pci.c | 2 +- arch/mips/ralink/of.c | 2 +- arch/mips/rb532/prom.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 1 - arch/mips/sibyte/common/cfe.c | 2 +- arch/mips/sibyte/swarm/setup.c | 2 +- arch/mips/txx9/rbtx4938/prom.c | 2 +- arch/nds32/kernel/setup.c | 3 +- arch/nds32/mm/highmem.c | 2 +- arch/nds32/mm/init.c | 3 +- arch/nios2/kernel/prom.c | 2 +- arch/nios2/kernel/setup.c | 1 - arch/nios2/mm/init.c | 2 +- arch/openrisc/kernel/setup.c | 3 +- arch/openrisc/mm/init.c | 3 +- arch/parisc/mm/init.c | 1 - arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/kernel/setup_64.c | 3 +- arch/powerpc/lib/alloc.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 1 - arch/powerpc/mm/mem.c | 3 +- arch/powerpc/mm/mmu_context_nohash.c | 2 +- arch/powerpc/mm/numa.c | 3 +- arch/powerpc/platforms/powermac/nvram.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +- arch/powerpc/platforms/ps3/setup.c | 2 +- arch/powerpc/sysdev/msi_bitmap.c | 2 +- arch/riscv/mm/init.c | 3 +- arch/s390/kernel/crash_dump.c | 3 +- arch/s390/kernel/setup.c | 1 - arch/s390/kernel/smp.c | 3 +- arch/s390/kernel/topology.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/s390/mm/extmem.c | 2 +- arch/s390/mm/init.c | 3 +- arch/s390/mm/vmem.c | 3 +- arch/s390/numa/mode_emu.c | 1 - arch/s390/numa/numa.c | 1 - arch/s390/numa/toptree.c | 2 +- arch/sh/mm/init.c | 3 +- arch/sh/mm/ioremap_fixed.c | 2 +- arch/sparc/kernel/mdesc.c | 2 - arch/sparc/kernel/prom_32.c | 2 +- arch/sparc/kernel/setup_64.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/sparc/mm/init_32.c | 1 - arch/sparc/mm/init_64.c | 3 +- arch/sparc/mm/srmmu.c | 2 +- arch/um/drivers/net_kern.c | 2 +- arch/um/drivers/vector_kern.c | 2 +- arch/um/kernel/initrd.c | 2 +- arch/um/kernel/mem.c | 1 - arch/um/kernel/physmem.c | 1 - arch/unicore32/kernel/hibernate.c | 2 +- arch/unicore32/kernel/setup.c | 3 +- arch/unicore32/mm/init.c | 3 +- arch/unicore32/mm/mmu.c | 1 - arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/acpi/sleep.c | 1 - arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/e820.c | 3 +- arch/x86/kernel/mpparse.c | 1 - arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/kernel/pvclock.c | 2 +- arch/x86/kernel/setup.c | 1 - arch/x86/kernel/setup_percpu.c | 1 - arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tce_64.c | 1 - arch/x86/mm/amdtopology.c | 1 - arch/x86/mm/fault.c | 2 +- arch/x86/mm/highmem_32.c | 2 +- arch/x86/mm/init.c | 1 - arch/x86/mm/init_32.c | 1 - arch/x86/mm/init_64.c | 1 - arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/kasan_init_64.c | 3 +- arch/x86/mm/kaslr.c | 1 + arch/x86/mm/numa.c | 1 - arch/x86/mm/numa_32.c | 1 - arch/x86/mm/numa_64.c | 2 +- arch/x86/mm/numa_emulation.c | 1 - arch/x86/mm/pageattr-test.c | 2 +- arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pat.c | 2 +- arch/x86/mm/physaddr.c | 2 +- arch/x86/pci/i386.c | 2 +- arch/x86/platform/efi/efi.c | 3 +- arch/x86/platform/efi/efi_64.c | 2 +- arch/x86/platform/efi/quirks.c | 1 - arch/x86/platform/olpc/olpc_dt.c | 2 +- arch/x86/power/hibernate_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_pv.c | 3 +- arch/x86/xen/p2m.c | 1 - arch/xtensa/kernel/pci.c | 2 +- arch/xtensa/mm/cache.c | 2 +- arch/xtensa/mm/init.c | 2 +- arch/xtensa/mm/kasan_init.c | 3 +- arch/xtensa/mm/mmu.c | 2 +- arch/xtensa/platforms/iss/network.c | 2 +- arch/xtensa/platforms/iss/setup.c | 2 +- block/blk-settings.c | 2 +- block/bounce.c | 2 +- drivers/acpi/numa.c | 1 - drivers/acpi/tables.c | 3 +- drivers/base/platform.c | 2 +- drivers/clk/ti/clk.c | 2 +- drivers/firmware/dmi_scan.c | 2 +- drivers/firmware/efi/apple-properties.c | 2 +- drivers/firmware/iscsi_ibft_find.c | 2 +- drivers/firmware/memmap.c | 2 +- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/mtk_iommu_v1.c | 2 +- drivers/macintosh/smu.c | 3 +- drivers/mtd/ar7part.c | 2 +- drivers/net/arcnet/arc-rimi.c | 2 +- drivers/net/arcnet/com20020-isa.c | 2 +- drivers/net/arcnet/com90io.c | 2 +- drivers/of/fdt.c | 1 - drivers/of/unittest.c | 2 +- drivers/s390/char/fs3270.c | 2 +- drivers/s390/char/tty3270.c | 2 +- drivers/s390/cio/cmf.c | 2 +- drivers/s390/virtio/virtio_ccw.c | 2 +- drivers/sfi/sfi_core.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c | 2 +- drivers/usb/early/xhci-dbc.c | 1 - drivers/xen/balloon.c | 2 +- drivers/xen/events/events_base.c | 2 +- drivers/xen/grant-table.c | 2 +- drivers/xen/swiotlb-xen.c | 1 - drivers/xen/xen-selfballoon.c | 2 +- fs/dcache.c | 2 +- fs/inode.c | 2 +- fs/namespace.c | 2 +- fs/proc/kcore.c | 2 +- fs/proc/page.c | 2 +- fs/proc/vmcore.c | 2 +- include/linux/bootmem.h | 173 ---------------------------- include/linux/memblock.h | 151 +++++++++++++++++++++++- init/main.c | 2 +- kernel/dma/direct.c | 2 +- kernel/dma/swiotlb.c | 2 +- kernel/futex.c | 2 +- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/pid.c | 2 +- kernel/power/snapshot.c | 2 +- kernel/printk/printk.c | 1 - kernel/profile.c | 2 +- lib/cpumask.c | 2 +- mm/hugetlb.c | 1 - mm/kasan/kasan_init.c | 3 +- mm/kmemleak.c | 2 +- mm/memblock.c | 1 - mm/memory_hotplug.c | 1 - mm/page_alloc.c | 1 - mm/page_ext.c | 2 +- mm/page_idle.c | 2 +- mm/page_owner.c | 2 +- mm/percpu.c | 2 +- mm/sparse-vmemmap.c | 1 - mm/sparse.c | 1 - net/ipv4/inet_hashtables.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/sctp/protocol.c | 2 +- net/xfrm/xfrm_hash.c | 2 +- 275 files changed, 353 insertions(+), 476 deletions(-) delete mode 100644 include/linux/bootmem.h (limited to 'kernel') diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c index 026ee955fd10..867e8730b0c5 100644 --- a/arch/alpha/kernel/core_cia.c +++ b/arch/alpha/kernel/core_cia.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 35572be9deb5..a9fd133a7fb2 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 1f00c9433b10..8a568c4d8e81 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 132b06bdf903..97551597581b 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index e7c956ea46b6..f334b8928d72 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index 59cbfc2bf2c5..a9378ee0c2f1 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 4cc3eb92f55b..13937e72d875 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 5d178c7ba5b2..82cf950bda2a 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 64c06a0adf3d..a37fd990bd55 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index ff4f54b86c7f..cd9a112d67ff 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 853d15344934..a42fc5c4db89 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -19,7 +19,7 @@ #include #include #include -#include /* max_low_pfn */ +#include /* max_low_pfn */ #include #include diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 26cd925d19b1..74846553e3f1 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 2a01dd1005f4..d34f69eb1a95 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index f582dc8944c9..48e700151810 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -7,7 +7,7 @@ * */ -#include +#include #include #include #include diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 0f29c6548779..f8fe5668b30f 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -8,7 +8,6 @@ #include #include -#include #include #ifdef CONFIG_BLK_DEV_INITRD #include diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 13bcd3b867cb..e3057c1b55b9 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 39e6090d23ac..840a4adc69fc 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 1f9b34a7eccd..cd5732ab0cdf 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -141,7 +141,7 @@ #include #include #include -#include +#include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 66566472c153..661fe48ab78d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,7 +9,6 @@ * * DMA uncached mapping support. */ -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d421a10c93a8..32e4845af2b6 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 785d2a562a23..cb44aa290e73 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 0641ba54ab62..e70a49fc8dcd 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index ed46dc188b22..44e3c351e1ea 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -16,7 +16,6 @@ #define pr_fmt(fmt) "ACPI: " fmt #include -#include #include #include #include diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 4f4f1815e047..eac1d0cc595c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3428427f6c93..7ce7306f1d75 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d190612b8f33..3a703e5d4e32 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d8d73073835f..9d9582cac6c4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6a65a2912d36..63527e585aac 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -11,7 +11,6 @@ */ #define pr_fmt(fmt) "kasan: " fmt -#include #include #include #include diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0bff116c07a8..27a31efd9e8e 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -20,7 +20,6 @@ #define pr_fmt(fmt) "NUMA: " fmt #include -#include #include #include #include diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c index 05d96a9541b5..2e1c0ea22eb0 100644 --- a/arch/c6x/kernel/setup.c +++ b/arch/c6x/kernel/setup.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 3383df8b3508..af5ada0520be 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #ifdef CONFIG_BLK_DEV_RAM #include #endif diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 34e2df5c0d6d..b32bfa1fe99e 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index f2bf4487aabd..6519252ac4db 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 706699374444..38eaa7b703e7 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index dc8c7e75b5d1..b3c3e04d4e57 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 88643faf3981..1719ede9e9bd 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 39f4433a6f0e..bec762a9b418 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f77d80edddfe..8f106638913c 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -23,7 +23,7 @@ * Skip non-WB memory and ignore empty memory ranges. */ #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 6b51c88e3578..b49fe6f618ed 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -6,7 +6,7 @@ #ifdef CONFIG_VIRTUAL_MEM_MAP #include #include -#include +#include EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ #endif diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 550243a94b5d..fe6e4946672e 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -90,7 +90,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 71209766c47f..9a6603f8e409 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -77,7 +77,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index dfe40cbdf3b3..45f956ad715a 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 0e6c2d9fb498..583a3746d70b 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 74fe317477e6..51ec944b036c 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 9b820f7a6a98..e311ee13e61d 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index e04efa088902..7601fe0622d2 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -28,7 +28,7 @@ * acquired, then the read-write lock must be acquired first. */ #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 9e5c23a6b8b4..6e447234205c 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -14,7 +14,6 @@ * Routines used by ia64 machines with contiguous (or virtually contiguous) * memory. */ -#include #include #include #include diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 70609f823960..8a965784340c 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 43ea4a47163d..d5e12ff1d73c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index aa19b7ac8222..3861d6e32d5f 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 5554863b4c9b..ab545daff7c3 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 5d71800df431..196a0dd7ff97 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 9146192b86f5..9900e6d4add6 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 8b05d5581615..98f55220c67d 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -6,7 +6,7 @@ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. */ -#include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ab2564f95199..71ad6b0ccab4 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index 1089d67df315..6ffc204eb07d 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c index adad03ca6e11..360c723c0ae6 100644 --- a/arch/m68k/coldfire/m54xx.c +++ b/arch/m68k/coldfire/m54xx.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 5d3596c180f9..a1a3eaeaf58c 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index cfd5475bfc31..3c5def10d486 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c index 107082877064..1b4c562753da 100644 --- a/arch/m68k/kernel/uboot.c +++ b/arch/m68k/kernel/uboot.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index ae49ae4d3049..933c33e76a48 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 38a1d92dd555..0de4999a3810 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 2113eec8dbf9..7497cf30bf1c 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index 19c05ab9824d..f736db48a2e1 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index 79a2bb857906..542c4404861c 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c index 5f92c72b05c3..a2c1c9304895 100644 --- a/arch/m68k/sun3/dvma.c +++ b/arch/m68k/sun3/dvma.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index d30da12a1702..582a1284059a 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index 72d94585b52e..8be8b750c629 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -7,7 +7,7 @@ * Contains common routines for sun3/sun3x DVMA management. */ -#include +#include #include #include #include diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c index b2acbc862f60..89e630e66555 100644 --- a/arch/m68k/sun3x/dvma.c +++ b/arch/m68k/sun3x/dvma.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index d801cc5f5b95..45e0a1aa9357 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 9989740d397a..8c14988f52f2 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -7,10 +7,9 @@ * for more details. */ -#include +#include #include #include -#include #include /* mem_init */ #include #include diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 2ffd171af8b6..6b89a66ec1a5 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c index 0332f0514d05..80390a9ec264 100644 --- a/arch/mips/ar7/memory.c +++ b/arch/mips/ar7/memory.c @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include +#include #include #include #include diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 4c7a93f4039a..9728abcb18fa 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 7019e2967009..77a836e661c9 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c index 2be9caaa2085..e28ee9a7cc7e 100644 --- a/arch/mips/bcm63xx/setup.c +++ b/arch/mips/bcm63xx/setup.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 6329c5f780d6..1738a06396f9 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index c44c1a654471..e8eb60ed99f2 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -11,7 +11,7 @@ * Copyright (C) 2010 Cavium Networks, Inc. */ #include -#include +#include #include #include #include diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c index a2acc6454cf3..5073d2ed78bb 100644 --- a/arch/mips/dec/prom/memory.c +++ b/arch/mips/dec/prom/memory.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c index cae42259d6da..675337b8a4a0 100644 --- a/arch/mips/emma/common/prom.c +++ b/arch/mips/emma/common/prom.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index dd9496f26e6a..429b7f8d2aeb 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 0a0aaf39fd16..4c41ed0a637e 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 2c7288041a99..81845ba04835 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c index a8657d29c62e..01b2bd95ba1f 100644 --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 89950b7bf536..93b8e0b4332f 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 31522d3bc8bf..41c1683761bb 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 623dc18f7f2f..0f852e1b5891 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 0bef238d2c0c..6176b9acba95 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c index f43629979a0e..5812e6145801 100644 --- a/arch/mips/kvm/commpage.c +++ b/arch/mips/kvm/commpage.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c index f8e772564d74..d77b61b3d6ee 100644 --- a/arch/mips/kvm/dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "commpage.h" diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 4144bfaef137..ec9ed23bca7f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index aa0a1a00faf6..7257e8b6f5a9 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index f7ea8e21656b..1fcc4d149054 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index d984bd5c2ec5..14d4c5e2b42f 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c index 37b8fc5b9ac9..5ce1407de2d5 100644 --- a/arch/mips/lasat/prom.c +++ b/arch/mips/lasat/prom.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c index 6ef17120722f..c073fbcb9805 100644 --- a/arch/mips/loongson64/common/init.c +++ b/arch/mips/loongson64/common/init.c @@ -8,7 +8,7 @@ * option) any later version. */ -#include +#include #include #include #include diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 703ad4536fe0..622761878cd1 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0893b6136498..b521d8e2d359 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index b19a3c506b1e..e2a33adc0f29 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c index a47556723b85..868921adef1d 100644 --- a/arch/mips/mti-malta/malta-memory.c +++ b/arch/mips/mti-malta/malta-memory.c @@ -12,7 +12,7 @@ * Steven J. Hill */ #include -#include +#include #include #include diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c index b5ba83f4c646..c856f2a3ea42 100644 --- a/arch/mips/netlogic/xlp/dt.c +++ b/arch/mips/netlogic/xlp/dt.c @@ -33,7 +33,7 @@ */ #include -#include +#include #include #include diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 3c3b1e6abb53..687513880fbf 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index c2e94cf5ecda..e68b44b27c0d 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 1ada8492733b..d544e7b07f7a 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c index 6484e4a4597b..361a690facbf 100644 --- a/arch/mips/rb532/prom.c +++ b/arch/mips/rb532/prom.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index cb1f1a6a166d..d8b8444d6795 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c index 092fb2a6ec4a..12a780f251e1 100644 --- a/arch/mips/sibyte/common/cfe.c +++ b/arch/mips/sibyte/common/cfe.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 152ca71cc2d7..3b034b7178d6 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c index bcb469247e8c..2b36a2ee744c 100644 --- a/arch/mips/txx9/rbtx4938/prom.c +++ b/arch/mips/txx9/rbtx4938/prom.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 63a1a5ef5219..eacc79024879 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -2,9 +2,8 @@ // Copyright (C) 2005-2017 Andes Technology Corporation #include -#include -#include #include +#include #include #include #include diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c index e17cb8a69315..022779af6148 100644 --- a/arch/nds32/mm/highmem.c +++ b/arch/nds32/mm/highmem.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 66d3e9cf498d..131104bd2538 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -7,12 +7,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index a6d4f7530247..232a36b511aa 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 2d0011ddd4d5..6bbd4ae2beb0 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 12923501d94f..16cea5776b87 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index e17fcd83120f..c605bdad1746 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -30,13 +30,12 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 91a6a9ab7598..d157310eb377 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -26,12 +26,11 @@ #include #include #include -#include +#include #include #include #include /* for initrd_* */ #include -#include #include #include diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 7e7a3126c5e9..2d7cffcaa476 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index d39ec3a4550a..274bd1442dd9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2b56d1f30387..93ee3703b42f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9216c3a7fcfc..2a51e4cc8246 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -29,10 +29,9 @@ #include #include #include -#include +#include #include #include -#include #include #include diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index bf87d6e13369..5b61704447c1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a7226ed9cae6..8cf035e68378 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b3fe79064a69..0a64fffabee1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -27,12 +27,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 954f1986af4d..67b9d7b669a1 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f04f15f9d232..3a048e98a132 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -11,7 +11,7 @@ #define pr_fmt(fmt) "numa: " fmt #include -#include +#include #include #include #include @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index f45b369177a4..f3391be7c762 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 23a67b545b70..aba81cbf0b36 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -17,11 +17,10 @@ #include #include #include -#include +#include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 12519857a33c..658bfab3350b 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 349a9ff6ca5b..2444feda831f 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d58c111099b3..1d9bfaff60bc 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,9 +13,8 @@ #include #include -#include -#include #include +#include #include #include diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d17566a8c76f..97eae3871868 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,10 +13,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 781c1053a773..72dd23ef771b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 44f9a7d6450b..f82b3d3c36e2 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -20,7 +20,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include -#include +#include #include #include #include @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 799a91882a76..8992b04c0ade 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -8,7 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index ec31b48a42a5..ebe748a9f472 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 84111a43ea29..eba2def3414d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 873f6ee1c46d..76d0708438e9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 04638b0b9ef1..0472e27febdf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,14 +4,13 @@ * Author(s): Heiko Carstens */ -#include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 5a381fc8e958..bfba273c32c0 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 297f5d8b0890..ae0d9e889534 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c index 7f61cc3fd4d1..71a608cd4f61 100644 --- a/arch/s390/numa/toptree.c +++ b/arch/s390/numa/toptree.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 21447f866415..c8c13c777162 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -11,12 +11,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c index 927a1294c465..07e744d75fa0 100644 --- a/arch/sh/mm/ioremap_fixed.c +++ b/arch/sh/mm/ioremap_fixed.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index a41526bd91e2..9a26b442f820 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -5,13 +5,11 @@ */ #include #include -#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 4389944735c6..d41e2a749c5d 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index de7c87b153fe..cd2825cb8420 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 6cc80d0f4b9f..4792e08ad36b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 880714565c40..d900952bfc5f 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a8c3453195e6..3c8aac21f426 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index b48fea5ad9ef..a6142c5abf61 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index ef19a391214f..673816880cce 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -6,7 +6,7 @@ * Licensed under the GPL. */ -#include +#include #include #include #include diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 20442d20bd09..2b4dded11a7a 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 844056cf313e..3678f5b05e42 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 2c672a8f4571..1067469ba2ea 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 296a91a04598..5bf56af4d5b9 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c index 9969ec374abb..29b71c68eb7c 100644 --- a/arch/unicore32/kernel/hibernate.c +++ b/arch/unicore32/kernel/hibernate.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 9f163f976315..b2c38b32ea57 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 44fd0e8fbe87..cf4eb9481fd6 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -11,13 +11,12 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 18b355a20f0b..040a8c279761 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f5ea6415b778..7f5d212551d4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f1915b744052..ca13851f0570 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ab731ab09f06..32b2b7a41ef5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c7450900e0e..5fbc57e4b0b9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -47,7 +47,7 @@ #include #include /* time_after() */ #include -#include +#include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 660d0b22e962..cbbd57ae06ee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,7 +1,7 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include +#include #include #include #include diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a0ec4c37265a..68ff62bffbab 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -9,11 +9,10 @@ * allocation code routines via a platform independent interface (memblock, etc.). */ #include -#include +#include #include #include #include -#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f1c5eb99d445..3482460d984d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7ba73fe0d917..f4562fcec681 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 71c0b01d93b1..bd08b9e1c9e2 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 637982efecd8..9b158b4716d2 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7005f89bf3b2..b74e7bfed6ab 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 483412fb8a24..e8796fcd7e5a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5369d7fac797..a9134d1910b9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index 75730ce01f8d..285aaa62d153 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 048c761d97b0..058b2f36b3a6 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b24eb4eb9984..71d4b9d4d43f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include /* task_stack_*(), ... */ #include /* oops_begin/end, ... */ #include /* search_exception_tables */ -#include /* max_low_pfn */ +#include /* max_low_pfn */ #include /* NOKPROBE_SYMBOL, ... */ #include /* kmmio_handler, ... */ #include /* perf_sw_event */ diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 62915a5e0fa2..0d4bdcb84da5 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,7 +1,7 @@ #include #include #include /* for totalram_pages */ -#include +#include void *kmap(struct page *page) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index faca978ebf9d..ef99f3892e1f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -3,7 +3,6 @@ #include #include #include -#include /* for max_low_pfn */ #include #include diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3bbe5f58a67d..49ecf5ecf6d3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bfb0bedc21d3..5fab264948c2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24e0920a9b25..5378d10f1d31 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -6,7 +6,7 @@ * (C) Copyright 1995 1996 Linus Torvalds */ -#include +#include #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8f87499124b8..04a9cf6b034f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -5,10 +5,9 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include +#include #include #include -#include #include #include #include diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 61db77b0eda9..3f452ffed7e9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 16e37d712ffd..1308f5408bf7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index e8a4a09e20f1..f2bd3d61e16b 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -22,7 +22,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 066f3511d5f1..59d80160fa5a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -3,7 +3,7 @@ * Generic VM initialization for x86-64 NUMA setups. * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ -#include +#include #include "numa_internal.h" diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index b54d52a2d00a..a80fdd7fb40f 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "numa_internal.h" diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index a25588ad75ef..08f8f76a4852 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -5,7 +5,7 @@ * Clears the a test pte bit on random pages in the direct mapping, * then reverts and compares page tables forwards and afterwards. */ -#include +#include #include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 62bb30b4bd2a..f799076e3d57 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -3,7 +3,7 @@ * Thanks to Ben LaHaise for precious feedback. */ #include -#include +#include #include #include #include diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3d0c83ef6aab..08013524fba1 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index 7f9acb68324c..bdc98150d4db 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index ed4ac215305d..8cd66152cdb0 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..7ae939e353cd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -36,9 +36,8 @@ #include #include #include -#include -#include #include +#include #include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e8da7f492970..cf0347f61b21 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4b70d0f5a803..95e77a667ba5 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 140cd76ee897..115c8e4173bb 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 15695e30f982..be15bdcb20df 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 67b2f31a1265..e996e8e744cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -#include +#include #endif #include #include diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ec7a4209f310..2f6787fc7106 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b3e11afed25b..b06731705529 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 21f13e9aabe1..5ca440a74316 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 9220dcde7520..b27359e2a464 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index f7fbe6334939..9750a48f491b 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 1a30a258ccd0..6b95ca43aec0 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -8,11 +8,10 @@ * Copyright (C) 2017 Cadence Design Systems Inc. */ -#include +#include #include #include #include -#include #include #include #include diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index f33a1ff2662a..a4dcfd39bc5c 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -4,7 +4,7 @@ * * Extracted from init.c */ -#include +#include #include #include #include diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 206b9d4591e8..190846dddc67 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index 58709e89a8ed..c14cc673976c 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -16,7 +16,7 @@ * option) any later version. * */ -#include +#include #include #include #include diff --git a/block/blk-settings.c b/block/blk-settings.c index ffd459969689..696c04c1ab6c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -6,7 +6,7 @@ #include #include #include -#include /* for max_pfn/max_low_pfn */ +#include /* for max_pfn/max_low_pfn */ #include #include #include diff --git a/block/bounce.c b/block/bounce.c index ec0d99995f5f..cf49fe02f65c 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 85167603b9c9..274699463b4f 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index a3d012b08fc5..61203eebf3a1 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -31,9 +31,8 @@ #include #include #include -#include -#include #include +#include #include #include "internal.h" diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 23cf4427f425..41b91af95afb 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 5c54d3734daf..5b2867a33b98 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include "clock.h" diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index f2483548cde9..099d83e4e910 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 2b675f788b61..ac1654f74dc7 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -20,7 +20,7 @@ #define pr_fmt(fmt) "apple-properties: " fmt -#include +#include #include #include #include diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c index 2224f1dc074b..72d9ea18270b 100644 --- a/drivers/firmware/iscsi_ibft_find.c +++ b/drivers/firmware/iscsi_ibft_find.c @@ -18,7 +18,7 @@ * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 03cead6d5f97..2a23453f005a 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f9f69f7111a9..44bd5b9166bb 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -11,7 +11,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 676c029494e4..0e780848f59b 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -13,7 +13,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 0069f9084f9f..880a81c82b7a 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c index fc15ec58230a..0d33cf0842ad 100644 --- a/drivers/mtd/ar7part.c +++ b/drivers/mtd/ar7part.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index a07e24970be4..11c5bad95226 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index 38fa60ddaf2e..28510e33924f 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include "arcdevice.h" diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index 4e56aaf2b984..2c546013a980 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ffe62a7ae19b..bb532aae0d92 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 01e23b85e798..49ae2aa744d6 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -5,7 +5,7 @@ #define pr_fmt(fmt) "### dt-test ### " fmt -#include +#include #include #include #include diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 16a4e8528bbc..8f3a2eeb28dc 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -8,7 +8,7 @@ * Copyright IBM Corp. 2003, 2009 */ -#include +#include #include #include #include diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 5b8af2782282..2b0c36c2c568 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 8af4948dae80..72dd2471ec1e 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -13,7 +13,7 @@ #define KMSG_COMPONENT "cio" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include +#include #include #include #include diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 8f5c1d7f751a..97b6f197f007 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c index 153b3f3cc795..a5136901dd8a 100644 --- a/drivers/sfi/sfi_core.c +++ b/drivers/sfi/sfi_core.c @@ -59,7 +59,7 @@ #define KMSG_COMPONENT "SFI" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include +#include #include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index 79ad30d34949..b929c7ae3a27 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c index 4eba17f3d293..56fc527015cb 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c index e3bff068dc3c..6a1cd03bfe39 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index ddc5fa88f268..d2652dccc699 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index e12bb256036f..a3f5cbfcd4a1 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e6c1934734b7..93194f3e7540 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 84575baceebc..f15f89df1f36 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -33,7 +33,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index c5f26a87d238..2a7f545bd0b5 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -35,7 +35,6 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt -#include #include #include #include diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 55988b8418ee..5165aa82bf7d 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -68,7 +68,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index c2e443fb76ae..2593153471cf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/inode.c b/fs/inode.c index 9b808986d440..9e198f00b64c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/namespace.c b/fs/namespace.c index d86830c86ce8..98d27da43304 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d297fe4472a9..bbcc185062bb 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/proc/page.c b/fs/proc/page.c index 792c78a49174..6c517b11acf8 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0377a104495b..3fe90443c1bb 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h deleted file mode 100644 index b58873a567b2..000000000000 --- a/include/linux/bootmem.h +++ /dev/null @@ -1,173 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 - */ -#ifndef _LINUX_BOOTMEM_H -#define _LINUX_BOOTMEM_H - -#include -#include -#include -#include - -/* - * simple boot-time physical memory area allocator. - */ - -extern unsigned long max_low_pfn; -extern unsigned long min_low_pfn; - -/* - * highest page - */ -extern unsigned long max_pfn; -/* - * highest possible page - */ -extern unsigned long long max_possible_pfn; - -extern unsigned long memblock_free_all(void); -extern void reset_node_managed_pages(pg_data_t *pgdat); -extern void reset_all_zones_managed_pages(void); - -/* We are using top down, so it is safe to use 0 here */ -#define BOOTMEM_LOW_LIMIT 0 - -#ifndef ARCH_LOW_ADDRESS_LIMIT -#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL -#endif - -/* FIXME: use MEMBLOCK_ALLOC_* variants here */ -#define BOOTMEM_ALLOC_ACCESSIBLE 0 -#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) - -/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ -void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, - phys_addr_t max_addr, int nid); -void *memblock_alloc_try_nid_nopanic(phys_addr_t size, - phys_addr_t align, phys_addr_t min_addr, - phys_addr_t max_addr, int nid); -void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, int nid); -void __memblock_free_early(phys_addr_t base, phys_addr_t size); -void __memblock_free_late(phys_addr_t base, phys_addr_t size); - -static inline void * __init memblock_alloc( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_raw( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_from( - phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) -{ - return memblock_alloc_try_nid(size, align, min_addr, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_nopanic( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, - BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_low( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid(size, align, - BOOTMEM_LOW_LIMIT, - ARCH_LOW_ADDRESS_LIMIT, - NUMA_NO_NODE); -} -static inline void * __init memblock_alloc_low_nopanic( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, - BOOTMEM_LOW_LIMIT, - ARCH_LOW_ADDRESS_LIMIT, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_from_nopanic( - phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) -{ - return memblock_alloc_try_nid_nopanic(size, align, min_addr, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_node( - phys_addr_t size, phys_addr_t align, int nid) -{ - return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, nid); -} - -static inline void * __init memblock_alloc_node_nopanic( - phys_addr_t size, int nid) -{ - return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - nid); -} - -static inline void __init memblock_free_early( - phys_addr_t base, phys_addr_t size) -{ - __memblock_free_early(base, size); -} - -static inline void __init memblock_free_early_nid( - phys_addr_t base, phys_addr_t size, int nid) -{ - __memblock_free_early(base, size); -} - -static inline void __init memblock_free_late( - phys_addr_t base, phys_addr_t size) -{ - __memblock_free_late(base, size); -} - -extern void *alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long low_limit, - unsigned long high_limit); - -#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min - * shift passed via *_hash_shift */ -#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ - -/* Only NUMA needs hash distribution. 64bit NUMA architectures have - * sufficient vmalloc space. - */ -#ifdef CONFIG_NUMA -#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT) -extern int hashdist; /* Distribute hashes across NUMA nodes? */ -#else -#define hashdist (0) -#endif - - -#endif /* _LINUX_BOOTMEM_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9d46a7204975..1b4d85879cbe 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -15,6 +15,19 @@ #include #include +#include + +extern unsigned long max_low_pfn; +extern unsigned long min_low_pfn; + +/* + * highest page + */ +extern unsigned long max_pfn; +/* + * highest possible page + */ +extern unsigned long long max_possible_pfn; #define INIT_MEMBLOCK_REGIONS 128 #define INIT_PHYSMEM_REGIONS 4 @@ -119,6 +132,10 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); enum memblock_flags choose_memblock_flags(void); +unsigned long memblock_free_all(void); +void reset_node_managed_pages(pg_data_t *pgdat); +void reset_all_zones_managed_pages(void); + /* Low level functions */ int memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, @@ -300,11 +317,116 @@ static inline int memblock_get_region_node(const struct memblock_region *r) } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +/* Flags for memblock allocation APIs */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + +/* We are using top down, so it is safe to use 0 here */ +#define MEMBLOCK_LOW_LIMIT 0 + +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif + phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align); +void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); +void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); +void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); + +static inline void * __init memblock_alloc(phys_addr_t size, phys_addr_t align) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_raw(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_from(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr) +{ + return memblock_alloc_try_nid(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_nopanic(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_low(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); +} +static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr) +{ + return memblock_alloc_try_nid_nopanic(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_node(phys_addr_t size, + phys_addr_t align, int nid) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size, + int nid) +{ + return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static inline void __init memblock_free_early(phys_addr_t base, + phys_addr_t size) +{ + __memblock_free_early(base, size); +} + +static inline void __init memblock_free_early_nid(phys_addr_t base, + phys_addr_t size, int nid) +{ + __memblock_free_early(base, size); +} + +static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +{ + __memblock_free_late(base, size); +} + /* * Set the allocation direction to bottom-up or top-down. */ @@ -323,10 +445,6 @@ static inline bool memblock_bottom_up(void) return memblock.bottom_up; } -/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) -#define MEMBLOCK_ALLOC_ACCESSIBLE 0 - phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, enum memblock_flags flags); @@ -432,6 +550,31 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo i < memblock_type->cnt; \ i++, rgn = &memblock_type->regions[i]) +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long low_limit, + unsigned long high_limit); + +#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ +#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min + * shift passed via *_hash_shift */ +#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ + +/* Only NUMA needs hash distribution. 64bit NUMA architectures have + * sufficient vmalloc space. + */ +#ifdef CONFIG_NUMA +#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT) +extern int hashdist; /* Distribute hashes across NUMA nodes? */ +#else +#define hashdist (0) +#endif + #ifdef CONFIG_MEMTEST extern void early_memtest(phys_addr_t start, phys_addr_t end); #else diff --git a/init/main.c b/init/main.c index 8cef69c61389..51b8e7b8ae5b 100644 --- a/init/main.c +++ b/init/main.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index f14c376937e5..22a12ab5a5e9 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -4,7 +4,7 @@ * * DMA operations that map physical memory directly without using an IOMMU. */ -#include /* for max_pfn */ +#include /* for max_pfn */ #include #include #include diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 801da67e957b..5731daa09a32 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #define CREATE_TRACE_POINTS diff --git a/kernel/futex.c b/kernel/futex.c index 3e2de8fc1891..f423f9b6577e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 0130e488ebfe..8f36c27c1794 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -4,7 +4,7 @@ #endif #include -#include +#include #include /* diff --git a/kernel/pid.c b/kernel/pid.c index cdf63e53a014..b2f6c506035d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 34116a6097be..3c9e365438ad 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 429e4a3833ca..1b2a029360b7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/profile.c b/kernel/profile.c index 9aa2a4445b0d..9c08a2c7cb1d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/lib/cpumask.c b/lib/cpumask.c index 1405cb22e6bc..75b5e7672c4c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include /** * cpumask_next - get the next cpu in a cpumask diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e35d99844612..c007fb5fb8d5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 785a9707786b..c7550eb65922 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -10,11 +10,10 @@ * */ -#include +#include #include #include #include -#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 4f7e4b5a2f08..877de4fa0720 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -92,7 +92,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/memblock.c b/mm/memblock.c index 2ed73245b5da..c655342569f8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7e6509a53d79..41e326472ef9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5f3291601945..ef289fadec0e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/page_ext.c b/mm/page_ext.c index 5323c2ade686..ae44f7adbe07 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include +#include #include #include #include diff --git a/mm/page_idle.c b/mm/page_idle.c index 6302bc62c27d..b9e4b42b33ab 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include +#include #include #include #include diff --git a/mm/page_owner.c b/mm/page_owner.c index d80adfe702d3..87bc0dfdb52b 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/percpu.c b/mm/percpu.c index 3050c1d37d37..61cdbb3b3736 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -65,7 +65,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 7408cabed61a..7fec05796796 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -20,7 +20,6 @@ */ #include #include -#include #include #include #include diff --git a/mm/sparse.c b/mm/sparse.c index b139fbc61d10..ab2ac45e0440 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f5c9ef2586de..411dd7a90046 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1834818ed07b..9e6bc4d6daa7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -262,7 +262,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ca3ed931f2a9..1976fddb9e00 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -81,7 +81,7 @@ #include #include -#include +#include #include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e948db29ab53..9b277bd36d1a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index 2ad33ce1ea17..eca8d84d99bf 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 7e1c4e27928e5f87b9b1eaf06dc31773b2f1e7f1 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 30 Oct 2018 15:09:57 -0700 Subject: memblock: stop using implicit alignment to SMP_CACHE_BYTES When a memblock allocation APIs are called with align = 0, the alignment is implicitly set to SMP_CACHE_BYTES. Implicit alignment is done deep in the memblock allocator and it can come as a surprise. Not that such an alignment would be wrong even when used incorrectly but it is better to be explicit for the sake of clarity and the prinicple of the least surprise. Replace all such uses of memblock APIs with the 'align' parameter explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment in the memblock internal allocation functions. For the case when memblock APIs are used via helper functions, e.g. like iommu_arena_new_node() in Alpha, the helper functions were detected with Coccinelle's help and then manually examined and updated where appropriate. The direct memblock APIs users were updated using the semantic patch below: @@ expression size, min_addr, max_addr, nid; @@ ( | - memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc(size, 0) + memblock_alloc(size, SMP_CACHE_BYTES) | - memblock_alloc_raw(size, 0) + memblock_alloc_raw(size, SMP_CACHE_BYTES) | - memblock_alloc_from(size, 0, min_addr) + memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_nopanic(size, 0) + memblock_alloc_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_low(size, 0) + memblock_alloc_low(size, SMP_CACHE_BYTES) | - memblock_alloc_low_nopanic(size, 0) + memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_from_nopanic(size, 0, min_addr) + memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_node(size, 0, nid) + memblock_alloc_node(size, SMP_CACHE_BYTES, nid) ) [mhocko@suse.com: changelog update] [akpm@linux-foundation.org: coding-style fixes] [rppt@linux.ibm.com: fix missed uses of implicit alignment] Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Suggested-by: Michal Hocko Acked-by: Paul Burton [MIPS] Acked-by: Michael Ellerman [powerpc] Acked-by: Michal Hocko Cc: Catalin Marinas Cc: Chris Zankel Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Ingo Molnar Cc: Matt Turner Cc: Michal Simek Cc: Richard Weinberger Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/core_apecs.c | 3 ++- arch/alpha/kernel/core_lca.c | 3 ++- arch/alpha/kernel/core_marvel.c | 4 ++-- arch/alpha/kernel/core_mcpcia.c | 6 +++-- arch/alpha/kernel/core_t2.c | 2 +- arch/alpha/kernel/core_titan.c | 6 +++-- arch/alpha/kernel/core_tsunami.c | 6 +++-- arch/alpha/kernel/core_wildfire.c | 6 +++-- arch/alpha/kernel/pci-noop.c | 4 ++-- arch/alpha/kernel/pci.c | 4 ++-- arch/alpha/kernel/pci_iommu.c | 4 ++-- arch/arm/kernel/setup.c | 4 ++-- arch/arm/mach-omap2/omap_hwmod.c | 8 ++++--- arch/arm64/kernel/setup.c | 2 +- arch/ia64/kernel/mca.c | 4 ++-- arch/ia64/mm/tlb.c | 6 +++-- arch/ia64/sn/kernel/io_common.c | 4 +++- arch/ia64/sn/kernel/setup.c | 5 ++-- arch/m68k/sun3/sun3dvma.c | 2 +- arch/microblaze/mm/init.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/pci_32.c | 3 ++- arch/powerpc/lib/alloc.c | 2 +- arch/powerpc/mm/mmu_context_nohash.c | 7 +++--- arch/powerpc/platforms/powermac/nvram.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 6 ++--- arch/powerpc/sysdev/msi_bitmap.c | 2 +- arch/um/drivers/net_kern.c | 2 +- arch/um/drivers/vector_kern.c | 2 +- arch/um/kernel/initrd.c | 2 +- arch/unicore32/kernel/setup.c | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/e820.c | 3 ++- arch/x86/platform/olpc/olpc_dt.c | 2 +- arch/xtensa/platforms/iss/network.c | 2 +- drivers/clk/ti/clk.c | 2 +- drivers/firmware/efi/memmap.c | 2 +- drivers/firmware/memmap.c | 3 ++- drivers/macintosh/smu.c | 2 +- drivers/of/of_reserved_mem.c | 1 + include/linux/memblock.h | 3 ++- init/main.c | 13 +++++++---- kernel/power/snapshot.c | 3 ++- lib/cpumask.c | 2 +- mm/memblock.c | 8 ------- mm/page_alloc.c | 6 +++-- mm/percpu.c | 38 ++++++++++++++++--------------- mm/sparse.c | 3 ++- 50 files changed, 120 insertions(+), 96 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c index 1bf3eef34c22..6df765ff2b10 100644 --- a/arch/alpha/kernel/core_apecs.c +++ b/arch/alpha/kernel/core_apecs.c @@ -346,7 +346,8 @@ apecs_init_arch(void) * Window 1 is direct access 1GB at 1GB * Window 2 is scatter-gather 8MB at 8MB (for isa) */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); hose->sg_pci = NULL; __direct_map_base = 0x40000000; __direct_map_size = 0x40000000; diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c index 81c0c43635b0..57e0750419f2 100644 --- a/arch/alpha/kernel/core_lca.c +++ b/arch/alpha/kernel/core_lca.c @@ -275,7 +275,8 @@ lca_init_arch(void) * Note that we do not try to save any of the DMA window CSRs * before setting them, since we cannot read those CSRs on LCA. */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); hose->sg_pci = NULL; __direct_map_base = 0x40000000; __direct_map_size = 0x40000000; diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 8a568c4d8e81..c1d0c18c71ca 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -82,7 +82,7 @@ mk_resource_name(int pe, int port, char *str) char *name; sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port); - name = memblock_alloc(strlen(tmp) + 1, 0); + name = memblock_alloc(strlen(tmp) + 1, SMP_CACHE_BYTES); strcpy(name, tmp); return name; @@ -117,7 +117,7 @@ alloc_io7(unsigned int pe) return NULL; } - io7 = memblock_alloc(sizeof(*io7), 0); + io7 = memblock_alloc(sizeof(*io7), SMP_CACHE_BYTES); io7->pe = pe; raw_spin_lock_init(&io7->irq_lock); diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c index b1549db54260..74b1d018124c 100644 --- a/arch/alpha/kernel/core_mcpcia.c +++ b/arch/alpha/kernel/core_mcpcia.c @@ -364,9 +364,11 @@ mcpcia_startup_hose(struct pci_controller *hose) * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci) * Window 2 is direct access 2GB at 2GB */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); hose->sg_pci = iommu_arena_new(hose, 0x40000000, - size_for_memory(0x40000000), 0); + size_for_memory(0x40000000), + SMP_CACHE_BYTES); __direct_map_base = 0x80000000; __direct_map_size = 0x80000000; diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c index 2c00b61ca379..98d5b6ff8a76 100644 --- a/arch/alpha/kernel/core_t2.c +++ b/arch/alpha/kernel/core_t2.c @@ -351,7 +351,7 @@ t2_sg_map_window2(struct pci_controller *hose, /* Note we can only do 1 SG window, as the other is for direct, so do an ISA SG area, especially for the floppy. */ - hose->sg_isa = iommu_arena_new(hose, base, length, 0); + hose->sg_isa = iommu_arena_new(hose, base, length, SMP_CACHE_BYTES); hose->sg_pci = NULL; temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20); diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 97551597581b..2a2820fb1be6 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -316,10 +316,12 @@ titan_init_one_pachip_port(titan_pachip_port *port, int index) * Window 1 is direct access 1GB at 2GB * Window 2 is scatter-gather 1GB at 3GB */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); hose->sg_isa->align_entry = 8; /* 64KB for ISA */ - hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000, 0); + hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000, + SMP_CACHE_BYTES); hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */ port->wsba[0].csr = hose->sg_isa->dma_base | 3; diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index f334b8928d72..fc1ab73f23de 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -319,12 +319,14 @@ tsunami_init_one_pchip(tsunami_pchip *pchip, int index) * NOTE: we need the align_entry settings for Acer devices on ES40, * specifically floppy and IDE when memory is larger than 2GB. */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); /* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */ hose->sg_isa->align_entry = 4; hose->sg_pci = iommu_arena_new(hose, 0x40000000, - size_for_memory(0x40000000), 0); + size_for_memory(0x40000000), + SMP_CACHE_BYTES); hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */ __direct_map_base = 0x80000000; diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c index cad36fc6ed7d..353c03d15442 100644 --- a/arch/alpha/kernel/core_wildfire.c +++ b/arch/alpha/kernel/core_wildfire.c @@ -111,8 +111,10 @@ wildfire_init_hose(int qbbno, int hoseno) * ??? We ought to scale window 3 memory. * */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); - hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0); + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, + SMP_CACHE_BYTES); + hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, + SMP_CACHE_BYTES); pci = WILDFIRE_pci(qbbno, hoseno); diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index a9378ee0c2f1..091cff3c68fd 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -33,7 +33,7 @@ alloc_pci_controller(void) { struct pci_controller *hose; - hose = memblock_alloc(sizeof(*hose), 0); + hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES); *hose_tail = hose; hose_tail = &hose->next; @@ -44,7 +44,7 @@ alloc_pci_controller(void) struct resource * __init alloc_resource(void) { - return memblock_alloc(sizeof(struct resource), 0); + return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); } SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus, diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 13937e72d875..97098127df83 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -392,7 +392,7 @@ alloc_pci_controller(void) { struct pci_controller *hose; - hose = memblock_alloc(sizeof(*hose), 0); + hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES); *hose_tail = hose; hose_tail = &hose->next; @@ -403,7 +403,7 @@ alloc_pci_controller(void) struct resource * __init alloc_resource(void) { - return memblock_alloc(sizeof(struct resource), 0); + return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); } diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 82cf950bda2a..46e08e0d9181 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -79,7 +79,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base, printk("%s: couldn't allocate arena from node %d\n" " falling back to system-wide allocation\n", __func__, nid); - arena = memblock_alloc(sizeof(*arena), 0); + arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES); } arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid); @@ -92,7 +92,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base, #else /* CONFIG_DISCONTIGMEM */ - arena = memblock_alloc(sizeof(*arena), 0); + arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES); arena->ptes = memblock_alloc_from(mem_size, align, 0); #endif /* CONFIG_DISCONTIGMEM */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 840a4adc69fc..ac7e08886863 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -856,7 +856,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) */ boot_alias_start = phys_to_idmap(start); if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) { - res = memblock_alloc(sizeof(*res), 0); + res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES); res->name = "System RAM (boot alias)"; res->start = boot_alias_start; res->end = phys_to_idmap(end); @@ -864,7 +864,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) request_resource(&iomem_resource, res); } - res = memblock_alloc(sizeof(*res), 0); + res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES); res->name = "System RAM"; res->start = start; res->end = end; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index cd5732ab0cdf..083dcd9942ce 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np) u64 size; int i; - provider = memblock_alloc(sizeof(*provider), 0); + provider = memblock_alloc(sizeof(*provider), SMP_CACHE_BYTES); if (!provider) return -ENOMEM; @@ -736,12 +736,14 @@ static int __init _setup_clkctrl_provider(struct device_node *np) of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2; provider->addr = - memblock_alloc(sizeof(void *) * provider->num_addrs, 0); + memblock_alloc(sizeof(void *) * provider->num_addrs, + SMP_CACHE_BYTES); if (!provider->addr) return -ENOMEM; provider->size = - memblock_alloc(sizeof(u32) * provider->num_addrs, 0); + memblock_alloc(sizeof(u32) * provider->num_addrs, + SMP_CACHE_BYTES); if (!provider->size) return -ENOMEM; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7ce7306f1d75..953e316521fc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -218,7 +218,7 @@ static void __init request_standard_resources(void) num_standard_resources = memblock.memory.cnt; standard_resources = memblock_alloc_low(num_standard_resources * sizeof(*standard_resources), - 0); + SMP_CACHE_BYTES); for_each_memblock(memory, region) { res = &standard_resources[i++]; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 9a6603f8e409..91bd1e129379 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -361,9 +361,9 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; #define IA64_LOG_ALLOCATE(it, size) \ {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ - (ia64_err_rec_t *)memblock_alloc(size, 0); \ + (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); \ ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ - (ia64_err_rec_t *)memblock_alloc(size, 0);} + (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);} #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) #define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) #define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index ab545daff7c3..9340bcb4f29c 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -59,8 +59,10 @@ struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; void __init mmu_context_init (void) { - ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0); - ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0); + ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, + SMP_CACHE_BYTES); + ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, + SMP_CACHE_BYTES); } /* diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 98f55220c67d..8df13d0d96fa 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -391,7 +391,9 @@ void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node) if (node >= num_online_nodes()) /* Headless/memless IO nodes */ node = 0; - hubdev_info = (struct hubdev_info *)memblock_alloc_node(size, 0, node); + hubdev_info = (struct hubdev_info *)memblock_alloc_node(size, + SMP_CACHE_BYTES, + node); npda->pdinfo = (void *)hubdev_info; } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 71ad6b0ccab4..a6d40a2c5bff 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -511,7 +511,8 @@ static void __init sn_init_pdas(char **cmdline_p) */ for_each_online_node(cnode) { nodepdaindr[cnode] = - memblock_alloc_node(sizeof(nodepda_t), 0, cnode); + memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES, + cnode); memset(nodepdaindr[cnode]->phys_cpuid, -1, sizeof(nodepdaindr[cnode]->phys_cpuid)); spin_lock_init(&nodepdaindr[cnode]->ptc_lock); @@ -522,7 +523,7 @@ static void __init sn_init_pdas(char **cmdline_p) */ for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) nodepdaindr[cnode] = - memblock_alloc_node(sizeof(nodepda_t), 0, 0); + memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES, 0); /* * Now copy the array of nodepda pointers to each nodepda. diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index 8be8b750c629..4d64711d3d47 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -268,7 +268,7 @@ void __init dvma_init(void) list_add(&(hole->list), &hole_list); iommu_use = memblock_alloc(IOMMU_TOTAL_ENTRIES * sizeof(unsigned long), - 0); + SMP_CACHE_BYTES); dvma_unmap_iommu(DVMA_START, DVMA_SIZE); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 8c14988f52f2..b17fd8aafd64 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -376,7 +376,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) if (mem_init_done) p = kzalloc(size, mask); else { - p = memblock_alloc(size, 0); + p = memblock_alloc(size, SMP_CACHE_BYTES); if (p) memset(p, 0, size); } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 41c1683761bb..ea09ed6a80a9 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -916,7 +916,7 @@ static void __init resource_init(void) if (end >= HIGHMEM_START) end = HIGHMEM_START - 1; - res = memblock_alloc(sizeof(struct resource), 0); + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); res->start = start; res->end = end; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f331a0054b3a..913bfca09c4f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void) paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; - paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, 0)); + paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES)); memset(paca_ptrs, 0x88, paca_ptrs_size); } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 274bd1442dd9..d3f04f2d8249 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -203,7 +203,8 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = memblock_alloc(sizeof(struct property) + 256, 0); + of_prop = memblock_alloc(sizeof(struct property) + 256, + SMP_CACHE_BYTES); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index 5b61704447c1..dedf88a76f58 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) if (slab_is_available()) p = kzalloc(size, mask); else { - p = memblock_alloc(size, 0); + p = memblock_alloc(size, SMP_CACHE_BYTES); } return p; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 67b9d7b669a1..2faca46ad720 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -461,10 +461,11 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = memblock_alloc(CTX_MAP_SIZE, 0); - context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0); + context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), + SMP_CACHE_BYTES); #ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0); + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, "powerpc/mmu/ctx:prepare", diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index f3391be7c762..ae54d7fe68f3 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = memblock_alloc(NVRAM_SIZE, 0); + nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index aba81cbf0b36..dd807446801e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3769,7 +3769,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_alloc(sizeof(*phb), 0); + phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES); /* Allocate PCI controller */ phb->hose = hose = pcibios_alloc_controller(np); @@ -3815,7 +3815,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = memblock_alloc(phb->diag_data_size, 0); + phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES); /* Parse 32-bit and IO ranges (if any) */ pci_process_bridge_OF_ranges(hose, np, !hose->global_number); @@ -3874,7 +3874,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = memblock_alloc(size, 0); + aux = memblock_alloc(size, SMP_CACHE_BYTES); phb->ioda.pe_alloc = aux; phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2444feda831f..d45450f6666a 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, if (bmp->bitmap_from_slab) bmp->bitmap = kzalloc(size, GFP_KERNEL); else { - bmp->bitmap = memblock_alloc(size, 0); + bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES); /* the bitmap won't be freed from memblock allocator */ kmemleak_not_leak(bmp->bitmap); } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 673816880cce..624cb47cc9cd 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -650,7 +650,7 @@ static int __init eth_setup(char *str) return 1; } - new = memblock_alloc(sizeof(*new), 0); + new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); INIT_LIST_HEAD(&new->list); new->index = n; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 2b4dded11a7a..10d8d20eb9ec 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1580,7 +1580,7 @@ static int __init vector_setup(char *str) str, error); return 1; } - new = memblock_alloc(sizeof(*new), 0); + new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); INIT_LIST_HEAD(&new->list); new->unit = n; new->arguments = str; diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 3678f5b05e42..ce169ea87e61 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -36,7 +36,7 @@ int __init read_initrd(void) return 0; } - area = memblock_alloc(size, 0); + area = memblock_alloc(size, SMP_CACHE_BYTES); if (load_initrd(initrd, area, size) == -1) return 0; diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index b2c38b32ea57..4b0cb68c355a 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -206,7 +206,7 @@ request_standard_resources(struct meminfo *mi) if (mi->bank[i].size == 0) continue; - res = memblock_alloc_low(sizeof(*res), 0); + res = memblock_alloc_low(sizeof(*res), SMP_CACHE_BYTES); res->name = "System RAM"; res->start = mi->bank[i].start; res->end = mi->bank[i].start + mi->bank[i].size - 1; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7f5d212551d4..92c76bf97ad8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -934,7 +934,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) */ #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE, - 0); + SMP_CACHE_BYTES); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5fbc57e4b0b9..2953bbf05c08 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2578,7 +2578,7 @@ static struct resource * __init ioapic_setup_resources(void) n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); n *= nr_ioapics; - mem = memblock_alloc(n, 0); + mem = memblock_alloc(n, SMP_CACHE_BYTES); res = (void *)mem; mem += sizeof(struct resource) * nr_ioapics; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 68ff62bffbab..50895c2f937d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1093,7 +1093,8 @@ void __init e820__reserve_resources(void) struct resource *res; u64 end; - res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, 0); + res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, + SMP_CACHE_BYTES); e820_res = res; for (i = 0; i < e820_table->nr_entries; i++) { diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 115c8e4173bb..24d2175a9480 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -141,7 +141,7 @@ void * __init prom_early_alloc(unsigned long size) * fast enough on the platforms we care about while minimizing * wasted bootmem) and hand off chunks of it to callers. */ - res = memblock_alloc(chunk_size, 0); + res = memblock_alloc(chunk_size, SMP_CACHE_BYTES); BUG_ON(!res); prom_early_allocated += chunk_size; memset(res, 0, chunk_size); diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 190846dddc67..d052712373b6 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -646,7 +646,7 @@ static int __init iss_net_setup(char *str) return 1; } - new = memblock_alloc(sizeof(*new), 0); + new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); if (new == NULL) { pr_err("Alloc_bootmem failed\n"); return 1; diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 5b2867a33b98..e205af814582 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) { struct clk_iomap *io; - io = memblock_alloc(sizeof(*io), 0); + io = memblock_alloc(sizeof(*io), SMP_CACHE_BYTES); io->mem = mem; diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index ef618bceb79a..fa2904fb841f 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -15,7 +15,7 @@ static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) { - return memblock_phys_alloc(size, 0); + return memblock_phys_alloc(size, SMP_CACHE_BYTES); } static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 2a23453f005a..d168c87c7d30 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -333,7 +333,8 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = memblock_alloc(sizeof(struct firmware_map_entry), 0); + entry = memblock_alloc(sizeof(struct firmware_map_entry), + SMP_CACHE_BYTES); if (WARN_ON(!entry)) return -ENOMEM; diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 880a81c82b7a..0a0b8e1f4236 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -492,7 +492,7 @@ int __init smu_init (void) goto fail_np; } - smu = memblock_alloc(sizeof(struct smu_device), 0); + smu = memblock_alloc(sizeof(struct smu_device), SMP_CACHE_BYTES); spin_lock_init(&smu->lock); INIT_LIST_HEAD(&smu->cmd_list); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index d6255c276a41..1977ee0adcb1 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -36,6 +36,7 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, * panic()s on allocation failure. */ end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end; + align = !align ? SMP_CACHE_BYTES : align; base = __memblock_alloc_base(size, align, end); if (!base) return -ENOMEM; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1b4d85879cbe..aee299a6aa76 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -406,7 +406,8 @@ static inline void * __init memblock_alloc_node(phys_addr_t size, static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size, int nid) { - return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT, + return memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, + MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } diff --git a/init/main.c b/init/main.c index 51b8e7b8ae5b..ee147103ba1b 100644 --- a/init/main.c +++ b/init/main.c @@ -375,10 +375,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } static void __init setup_command_line(char *command_line) { saved_command_line = - memblock_alloc(strlen(boot_command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES); initcall_command_line = - memblock_alloc(strlen(boot_command_line) + 1, 0); - static_command_line = memblock_alloc(strlen(command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES); + static_command_line = memblock_alloc(strlen(command_line) + 1, + SMP_CACHE_BYTES); strcpy(saved_command_line, boot_command_line); strcpy(static_command_line, command_line); } @@ -773,8 +774,10 @@ static int __init initcall_blacklist(char *str) str_entry = strsep(&str, ","); if (str_entry) { pr_debug("blacklisting initcall %s\n", str_entry); - entry = memblock_alloc(sizeof(*entry), 0); - entry->buf = memblock_alloc(strlen(str_entry) + 1, 0); + entry = memblock_alloc(sizeof(*entry), + SMP_CACHE_BYTES); + entry->buf = memblock_alloc(strlen(str_entry) + 1, + SMP_CACHE_BYTES); strcpy(entry->buf, str_entry); list_add(&entry->next, &blacklisted_initcalls); } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3c9e365438ad..b0308a2c6000 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -963,7 +963,8 @@ void __init __register_nosave_region(unsigned long start_pfn, BUG_ON(!region); } else { /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), 0); + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); } region->start_pfn = start_pfn; region->end_pfn = end_pfn; diff --git a/lib/cpumask.c b/lib/cpumask.c index 75b5e7672c4c..8d666ab84b5c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = memblock_alloc(cpumask_size(), 0); + *mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES); } /** diff --git a/mm/memblock.c b/mm/memblock.c index c655342569f8..839531133816 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1247,9 +1247,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, { phys_addr_t found; - if (!align) - align = SMP_CACHE_BYTES; - found = memblock_find_in_range_node(size, align, start, end, nid, flags); if (found && !memblock_reserve(found, size)) { @@ -1343,8 +1340,6 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali * The allocation is performed from memory region limited by * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE. * - * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0. - * * The phys address of allocated boot memory block is converted to virtual and * allocated memory is reset to 0. * @@ -1374,9 +1369,6 @@ static void * __init memblock_alloc_internal( if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, nid); - if (!align) - align = SMP_CACHE_BYTES; - if (max_addr > memblock.current_limit) max_addr = memblock.current_limit; again: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ef289fadec0e..a919ba5cb3c8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7710,9 +7710,11 @@ void *__init alloc_large_system_hash(const char *tablename, size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) - table = memblock_alloc_nopanic(size, 0); + table = memblock_alloc_nopanic(size, + SMP_CACHE_BYTES); else - table = memblock_alloc_raw(size, 0); + table = memblock_alloc_raw(size, + SMP_CACHE_BYTES); } else if (hashdist) { table = __vmalloc(size, gfp_flags, PAGE_KERNEL); } else { diff --git a/mm/percpu.c b/mm/percpu.c index 61cdbb3b3736..a6b74c6fe0be 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1102,8 +1102,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, /* allocate chunk */ chunk = memblock_alloc(sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(region_size >> PAGE_SHIFT), - 0); + BITS_TO_LONGS(region_size >> PAGE_SHIFT), + SMP_CACHE_BYTES); INIT_LIST_HEAD(&chunk->list); @@ -1114,12 +1114,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, chunk->nr_pages = region_size >> PAGE_SHIFT; region_bits = pcpu_chunk_map_bits(chunk); - chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * - sizeof(chunk->alloc_map[0]), 0); - chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * - sizeof(chunk->bound_map[0]), 0); - chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * - sizeof(chunk->md_blocks[0]), 0); + chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]), + SMP_CACHE_BYTES); + chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]), + SMP_CACHE_BYTES); + chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]), + SMP_CACHE_BYTES); pcpu_init_md_blocks(chunk); /* manage populated page bitmap */ @@ -2075,12 +2075,14 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); /* process group information and build config tables accordingly */ - group_offsets = memblock_alloc(ai->nr_groups * - sizeof(group_offsets[0]), 0); - group_sizes = memblock_alloc(ai->nr_groups * - sizeof(group_sizes[0]), 0); - unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0); - unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0); + group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]), + SMP_CACHE_BYTES); + group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]), + SMP_CACHE_BYTES); + unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), + SMP_CACHE_BYTES); + unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), + SMP_CACHE_BYTES); for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = UINT_MAX; @@ -2144,8 +2146,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, * empty chunks. */ pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; - pcpu_slot = memblock_alloc( - pcpu_nr_slots * sizeof(pcpu_slot[0]), 0); + pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]), + SMP_CACHE_BYTES); for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); @@ -2458,7 +2460,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); - areas = memblock_alloc_nopanic(areas_size, 0); + areas = memblock_alloc_nopanic(areas_size, SMP_CACHE_BYTES); if (!areas) { rc = -ENOMEM; goto out_free; @@ -2599,7 +2601,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, /* unaligned allocations can't be freed, round up to page size */ pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * sizeof(pages[0])); - pages = memblock_alloc(pages_size, 0); + pages = memblock_alloc(pages_size, SMP_CACHE_BYTES); /* allocate pages */ j = 0; diff --git a/mm/sparse.c b/mm/sparse.c index ab2ac45e0440..33307fc05c4d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -68,7 +68,8 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid) if (slab_is_available()) section = kzalloc_node(array_size, GFP_KERNEL, nid); else - section = memblock_alloc_node(array_size, 0, nid); + section = memblock_alloc_node(array_size, SMP_CACHE_BYTES, + nid); return section; } -- cgit v1.2.3