diff options
Diffstat (limited to 'lib')
37 files changed, 1678 insertions, 439 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index ce2abffb9ed8..5c2da561c516 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -92,6 +92,7 @@ config ARCH_USE_SYM_ANNOTATIONS config INDIRECT_PIO bool "Access I/O in non-MMIO mode" depends on ARM64 + depends on HAS_IOPORT help On some platforms where no separate I/O space exists, there are I/O hosts which can not be accessed in MMIO mode. Using the logical PIO @@ -509,6 +510,9 @@ config HAS_IOMEM depends on !NO_IOMEM default y +config HAS_IOPORT + bool + config HAS_IOPORT_MAP bool depends on HAS_IOMEM && !NO_IOPORT_MAP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c8b379e2e9ad..d49d7c5e12ec 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -791,6 +791,16 @@ config DEBUG_VM If unsure, say N. +config DEBUG_VM_SHOOT_LAZIES + bool "Debug MMU_LAZY_TLB_SHOOTDOWN implementation" + depends on DEBUG_VM + depends on MMU_LAZY_TLB_SHOOTDOWN + help + Enable additional IPIs that ensure lazy tlb mm references are removed + before the mm is freed. + + If unsure, say N. + config DEBUG_VM_MAPLE_TREE bool "Debug VM maple trees" depends on DEBUG_VM @@ -1143,7 +1153,7 @@ menu "Scheduler Debugging" config SCHED_DEBUG bool "Collect scheduler debugging info" - depends on DEBUG_KERNEL && PROC_FS + depends on DEBUG_KERNEL && DEBUG_FS default y help If you say Y here, the /sys/kernel/debug/sched file will be provided @@ -1392,7 +1402,7 @@ config LOCKDEP_STACK_TRACE_HASH_BITS range 10 30 default 14 help - Try increasing this value if you need large MAX_STACK_TRACE_ENTRIES. + Try increasing this value if you need large STACK_TRACE_HASH_SIZE. config LOCKDEP_CIRCULAR_QUEUE_BITS int "Bitsize for elements in circular_queue struct" @@ -1480,6 +1490,15 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. +config CSD_LOCK_WAIT_DEBUG_DEFAULT + bool "Default csd_lock_wait() debugging on at boot time" + depends on CSD_LOCK_WAIT_DEBUG + depends on 64BIT + default n + help + This option causes the csdlock_debug= kernel boot parameter to + default to 1 (basic debugging) instead of 0 (no debugging). + endmenu # lock debugging config TRACE_IRQFLAGS @@ -1958,9 +1977,21 @@ config FAIL_SUNRPC Provide fault-injection capability for SunRPC and its consumers. +config FAULT_INJECTION_CONFIGFS + bool "Configfs interface for fault-injection capabilities" + depends on FAULT_INJECTION + select CONFIGFS_FS + help + This option allows configfs-based drivers to dynamically configure + fault-injection via configfs. Each parameter for driver-specific + fault-injection can be made visible as a configfs attribute in a + configfs group. + + config FAULT_INJECTION_STACKTRACE_FILTER bool "stacktrace filter for fault-injection capabilities" - depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT + depends on FAULT_INJECTION + depends on (FAULT_INJECTION_DEBUG_FS || FAULT_INJECTION_CONFIGFS) && STACKTRACE_SUPPORT select STACKTRACE depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86 help diff --git a/lib/Makefile b/lib/Makefile index baf2821f7a00..876fcdeae34e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -47,7 +47,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \ list_sort.o uuid.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o base64.o \ - once.o refcount.o usercopy.o errseq.o bucket_locks.o \ + once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \ generic-radix-tree.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o @@ -231,6 +231,9 @@ lib-$(CONFIG_GENERIC_BUG) += bug.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_DEBUG_CORE) += dynamic_debug.o +#ensure exported functions have prototypes +CFLAGS_dynamic_debug.o := -DDYNAMIC_DEBUG_MODULE + obj-$(CONFIG_SYMBOLIC_ERRNAME) += errname.o obj-$(CONFIG_NLATTR) += nlattr.o diff --git a/lib/btree.c b/lib/btree.c index a82100c73b55..49420cae3a83 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -794,4 +794,3 @@ module_exit(btree_module_exit); MODULE_AUTHOR("Joern Engel <joern@logfs.org>"); MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); -MODULE_LICENSE("GPL"); diff --git a/lib/buildid.c b/lib/buildid.c index dfc62625cae4..e3a7acdeef0e 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -163,7 +163,7 @@ out: /** * build_id_parse_buf - Get build ID from a buffer - * @buf: Elf note section(s) to parse + * @buf: ELF note section(s) to parse * @buf_size: Size of @buf in bytes * @build_id: Build ID parsed from @buf, at least BUILD_ID_SIZE_MAX long * diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index f08d9c56f712..73c1636b927b 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -128,19 +128,31 @@ debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) } #endif +static int get_free_index(struct cpu_rmap *rmap) +{ + int i; + + for (i = 0; i < rmap->size; i++) + if (!rmap->obj[i]) + return i; + + return -ENOSPC; +} + /** * cpu_rmap_add - add object to a rmap * @rmap: CPU rmap allocated with alloc_cpu_rmap() * @obj: Object to add to rmap * - * Return index of object. + * Return index of object or -ENOSPC if no free entry was found */ int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) { - u16 index; + int index = get_free_index(rmap); + + if (index < 0) + return index; - BUG_ON(rmap->used >= rmap->size); - index = rmap->used++; rmap->obj[index] = obj; return index; } @@ -230,9 +242,10 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) if (!rmap) return; - for (index = 0; index < rmap->used; index++) { + for (index = 0; index < rmap->size; index++) { glue = rmap->obj[index]; - irq_set_affinity_notifier(glue->notify.irq, NULL); + if (glue) + irq_set_affinity_notifier(glue->notify.irq, NULL); } cpu_rmap_put(rmap); @@ -268,10 +281,22 @@ static void irq_cpu_rmap_release(struct kref *ref) container_of(ref, struct irq_glue, notify.kref); cpu_rmap_put(glue->rmap); + glue->rmap->obj[glue->index] = NULL; kfree(glue); } /** + * irq_cpu_rmap_remove - remove an IRQ from a CPU affinity reverse-map + * @rmap: The reverse-map + * @irq: The IRQ number + */ +int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq) +{ + return irq_set_affinity_notifier(irq, NULL); +} +EXPORT_SYMBOL(irq_cpu_rmap_remove); + +/** * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map * @rmap: The reverse-map * @irq: The IRQ number @@ -293,12 +318,22 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) glue->notify.release = irq_cpu_rmap_release; glue->rmap = rmap; cpu_rmap_get(rmap); - glue->index = cpu_rmap_add(rmap, glue); + rc = cpu_rmap_add(rmap, glue); + if (rc < 0) + goto err_add; + + glue->index = rc; rc = irq_set_affinity_notifier(irq, &glue->notify); - if (rc) { - cpu_rmap_put(glue->rmap); - kfree(glue); - } + if (rc) + goto err_set; + + return rc; + +err_set: + rmap->obj[glue->index] = NULL; +err_add: + cpu_rmap_put(glue->rmap); + kfree(glue); return rc; } EXPORT_SYMBOL(irq_cpu_rmap_add); diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c index 75ccb3e633e6..3b6dcfdd9628 100644 --- a/lib/crypto/blake2s-generic.c +++ b/lib/crypto/blake2s-generic.c @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/bug.h> #include <asm/unaligned.h> @@ -109,7 +108,3 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, } EXPORT_SYMBOL(blake2s_compress_generic); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("BLAKE2s hash function"); -MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 98e688c6d891..71a316552cc5 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -67,6 +67,5 @@ static int __init blake2s_mod_init(void) } module_init(blake2s_mod_init); -MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/lib/crypto/utils.c b/lib/crypto/utils.c index 53230ab1b195..c852c7151b0a 100644 --- a/lib/crypto/utils.c +++ b/lib/crypto/utils.c @@ -6,7 +6,7 @@ */ #include <asm/unaligned.h> -#include <crypto/algapi.h> +#include <crypto/utils.h> #include <linux/module.h> /* diff --git a/lib/debugobjects.c b/lib/debugobjects.c index df86e649d8be..b796799fadb2 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(struct hlist_head *list) return obj; } -/* - * Allocate a new object. If the pool is empty, switch off the debugger. - * Must be called with interrupts disabled. - */ static struct debug_obj * alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr) { @@ -552,11 +548,49 @@ static void debug_object_is_on_stack(void *addr, int onstack) WARN_ON(1); } +static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b, + const struct debug_obj_descr *descr, + bool onstack, bool alloc_ifstatic) +{ + struct debug_obj *obj = lookup_object(addr, b); + enum debug_obj_state state = ODEBUG_STATE_NONE; + + if (likely(obj)) + return obj; + + /* + * debug_object_init() unconditionally allocates untracked + * objects. It does not matter whether it is a static object or + * not. + * + * debug_object_assert_init() and debug_object_activate() allow + * allocation only if the descriptor callback confirms that the + * object is static and considered initialized. For non-static + * objects the allocation needs to be done from the fixup callback. + */ + if (unlikely(alloc_ifstatic)) { + if (!descr->is_static_object || !descr->is_static_object(addr)) + return ERR_PTR(-ENOENT); + /* Statically allocated objects are considered initialized */ + state = ODEBUG_STATE_INIT; + } + + obj = alloc_object(addr, b, descr); + if (likely(obj)) { + obj->state = state; + debug_object_is_on_stack(addr, onstack); + return obj; + } + + /* Out of memory. Do the cleanup outside of the locked region */ + debug_objects_enabled = 0; + return NULL; +} + static void __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack) { enum debug_obj_state state; - bool check_stack = false; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; @@ -572,16 +606,11 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack raw_spin_lock_irqsave(&db->lock, flags); - obj = lookup_object(addr, db); - if (!obj) { - obj = alloc_object(addr, db, descr); - if (!obj) { - debug_objects_enabled = 0; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_objects_oom(); - return; - } - check_stack = true; + obj = lookup_object_or_alloc(addr, db, descr, onstack, false); + if (unlikely(!obj)) { + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_objects_oom(); + return; } switch (obj->state) { @@ -607,8 +636,6 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack } raw_spin_unlock_irqrestore(&db->lock, flags); - if (check_stack) - debug_object_is_on_stack(addr, onstack); } /** @@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack); */ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; int ret; - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; if (!debug_objects_enabled) return 0; @@ -664,8 +689,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); - obj = lookup_object(addr, db); - if (obj) { + obj = lookup_object_or_alloc(addr, db, descr, false, true); + if (likely(!IS_ERR_OR_NULL(obj))) { bool print_object = false; switch (obj->state) { @@ -698,24 +723,16 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) raw_spin_unlock_irqrestore(&db->lock, flags); - /* - * We are here when a static object is activated. We - * let the type specific code confirm whether this is - * true or not. if true, we just make sure that the - * static object is tracked in the object tracker. If - * not, this must be a bug, so we try to fix it up. - */ - if (descr->is_static_object && descr->is_static_object(addr)) { - /* track this static object */ - debug_object_init(addr, descr); - debug_object_activate(addr, descr); - } else { - debug_print_object(&o, "activate"); - ret = debug_object_fixup(descr->fixup_activate, addr, - ODEBUG_STATE_NOTAVAILABLE); - return ret ? 0 : -EINVAL; + /* If NULL the allocation has hit OOM */ + if (!obj) { + debug_objects_oom(); + return 0; } - return 0; + + /* Object is neither static nor tracked. It's not initialized */ + debug_print_object(&o, "activate"); + ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE); + return ret ? 0 : -EINVAL; } EXPORT_SYMBOL_GPL(debug_object_activate); @@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free); */ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; @@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); raw_spin_lock_irqsave(&db->lock, flags); + obj = lookup_object_or_alloc(addr, db, descr, false, true); + raw_spin_unlock_irqrestore(&db->lock, flags); + if (likely(!IS_ERR_OR_NULL(obj))) + return; - obj = lookup_object(addr, db); + /* If NULL the allocation has hit OOM */ if (!obj) { - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; - - raw_spin_unlock_irqrestore(&db->lock, flags); - /* - * Maybe the object is static, and we let the type specific - * code confirm. Track this static object if true, else invoke - * fixup. - */ - if (descr->is_static_object && descr->is_static_object(addr)) { - /* Track this static object */ - debug_object_init(addr, descr); - } else { - debug_print_object(&o, "assert_init"); - debug_object_fixup(descr->fixup_assert_init, addr, - ODEBUG_STATE_NOTAVAILABLE); - } + debug_objects_oom(); return; } - raw_spin_unlock_irqrestore(&db->lock, flags); + /* Object is neither tracked nor static. It's not initialized. */ + debug_print_object(&o, "assert_init"); + debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE); } EXPORT_SYMBOL_GPL(debug_object_assert_init); diff --git a/lib/dhry_run.c b/lib/dhry_run.c index f9d33efa6d09..f15ac666e9d3 100644 --- a/lib/dhry_run.c +++ b/lib/dhry_run.c @@ -31,6 +31,7 @@ MODULE_PARM_DESC(iterations, static void dhry_benchmark(void) { + unsigned int cpu = get_cpu(); int i, n; if (iterations > 0) { @@ -45,9 +46,10 @@ static void dhry_benchmark(void) } report: + put_cpu(); if (n >= 0) - pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", - smp_processor_id(), n, n / DHRY_VAX); + pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", cpu, + n, n / DHRY_VAX); else if (n == -EAGAIN) pr_err("Please increase the number of iterations\n"); else diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 009f2ead09c1..fdd6d9800a70 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -1223,8 +1223,7 @@ static void ddebug_attach_module_classes(struct ddebug_table *dt, * Allocate a new ddebug_table for the given module * and add it to the global list. */ -static int __ddebug_add_module(struct _ddebug_info *di, unsigned int base, - const char *modname) +static int ddebug_add_module(struct _ddebug_info *di, const char *modname) { struct ddebug_table *dt; @@ -1263,11 +1262,6 @@ static int __ddebug_add_module(struct _ddebug_info *di, unsigned int base, return 0; } -int ddebug_add_module(struct _ddebug_info *di, const char *modname) -{ - return __ddebug_add_module(di, 0, modname); -} - /* helper for ddebug_dyndbg_(boot|module)_param_cb */ static int ddebug_dyndbg_param_cb(char *param, char *val, const char *modname, int on_err) @@ -1314,11 +1308,13 @@ static void ddebug_table_free(struct ddebug_table *dt) kfree(dt); } +#ifdef CONFIG_MODULES + /* * Called in response to a module being unloaded. Removes * any ddebug_table's which point at the module. */ -int ddebug_remove_module(const char *mod_name) +static int ddebug_remove_module(const char *mod_name) { struct ddebug_table *dt, *nextdt; int ret = -ENOENT; @@ -1337,6 +1333,33 @@ int ddebug_remove_module(const char *mod_name) return ret; } +static int ddebug_module_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + ret = ddebug_add_module(&mod->dyndbg_info, mod->name); + if (ret) + WARN(1, "Failed to allocate memory: dyndbg may not work properly.\n"); + break; + case MODULE_STATE_GOING: + ddebug_remove_module(mod->name); + break; + } + + return notifier_from_errno(ret); +} + +static struct notifier_block ddebug_module_nb = { + .notifier_call = ddebug_module_notify, + .priority = 0, /* dynamic debug depends on jump label */ +}; + +#endif /* CONFIG_MODULES */ + static void ddebug_remove_all_tables(void) { mutex_lock(&ddebug_lock); @@ -1388,6 +1411,14 @@ static int __init dynamic_debug_init(void) .num_classes = __stop___dyndbg_classes - __start___dyndbg_classes, }; +#ifdef CONFIG_MODULES + ret = register_module_notifier(&ddebug_module_nb); + if (ret) { + pr_warn("Failed to register dynamic debug module notifier\n"); + return ret; + } +#endif /* CONFIG_MODULES */ + if (&__start___dyndbg == &__stop___dyndbg) { if (IS_ENABLED(CONFIG_DYNAMIC_DEBUG)) { pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n"); @@ -1408,7 +1439,7 @@ static int __init dynamic_debug_init(void) mod_ct++; di.num_descs = mod_sites; di.descs = iter_mod_start; - ret = __ddebug_add_module(&di, i - mod_sites, modname); + ret = ddebug_add_module(&di, modname); if (ret) goto out_err; @@ -1419,7 +1450,7 @@ static int __init dynamic_debug_init(void) } di.num_descs = mod_sites; di.descs = iter_mod_start; - ret = __ddebug_add_module(&di, i - mod_sites, modname); + ret = ddebug_add_module(&di, modname); if (ret) goto out_err; diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 6cff320c4eb4..d608f9b48c10 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -244,3 +244,194 @@ struct dentry *fault_create_debugfs_attr(const char *name, EXPORT_SYMBOL_GPL(fault_create_debugfs_attr); #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ + +#ifdef CONFIG_FAULT_INJECTION_CONFIGFS + +/* These configfs attribute utilities are copied from drivers/block/null_blk/main.c */ + +static ssize_t fault_uint_attr_show(unsigned int val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static ssize_t fault_ulong_attr_show(unsigned long val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%lu\n", val); +} + +static ssize_t fault_bool_attr_show(bool val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static ssize_t fault_atomic_t_attr_show(atomic_t val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", atomic_read(&val)); +} + +static ssize_t fault_uint_attr_store(unsigned int *val, const char *page, size_t count) +{ + unsigned int tmp; + int result; + + result = kstrtouint(page, 0, &tmp); + if (result < 0) + return result; + + *val = tmp; + return count; +} + +static ssize_t fault_ulong_attr_store(unsigned long *val, const char *page, size_t count) +{ + int result; + unsigned long tmp; + + result = kstrtoul(page, 0, &tmp); + if (result < 0) + return result; + + *val = tmp; + return count; +} + +static ssize_t fault_bool_attr_store(bool *val, const char *page, size_t count) +{ + bool tmp; + int result; + + result = kstrtobool(page, &tmp); + if (result < 0) + return result; + + *val = tmp; + return count; +} + +static ssize_t fault_atomic_t_attr_store(atomic_t *val, const char *page, size_t count) +{ + int tmp; + int result; + + result = kstrtoint(page, 0, &tmp); + if (result < 0) + return result; + + atomic_set(val, tmp); + return count; +} + +#define CONFIGFS_ATTR_NAMED(_pfx, _name, _attr_name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = _attr_name, \ + .ca_mode = 0644, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ + .store = _pfx##_name##_store, \ +} + +static struct fault_config *to_fault_config(struct config_item *item) +{ + return container_of(to_config_group(item), struct fault_config, group); +} + +#define FAULT_CONFIGFS_ATTR_NAMED(NAME, ATTR_NAME, MEMBER, TYPE) \ +static ssize_t fault_##NAME##_show(struct config_item *item, char *page) \ +{ \ + return fault_##TYPE##_attr_show(to_fault_config(item)->attr.MEMBER, page); \ +} \ +static ssize_t fault_##NAME##_store(struct config_item *item, const char *page, size_t count) \ +{ \ + struct fault_config *config = to_fault_config(item); \ + return fault_##TYPE##_attr_store(&config->attr.MEMBER, page, count); \ +} \ +CONFIGFS_ATTR_NAMED(fault_, NAME, ATTR_NAME) + +#define FAULT_CONFIGFS_ATTR(NAME, TYPE) \ + FAULT_CONFIGFS_ATTR_NAMED(NAME, __stringify(NAME), NAME, TYPE) + +FAULT_CONFIGFS_ATTR(probability, ulong); +FAULT_CONFIGFS_ATTR(interval, ulong); +FAULT_CONFIGFS_ATTR(times, atomic_t); +FAULT_CONFIGFS_ATTR(space, atomic_t); +FAULT_CONFIGFS_ATTR(verbose, ulong); +FAULT_CONFIGFS_ATTR_NAMED(ratelimit_interval, "verbose_ratelimit_interval_ms", + ratelimit_state.interval, uint); +FAULT_CONFIGFS_ATTR_NAMED(ratelimit_burst, "verbose_ratelimit_burst", + ratelimit_state.burst, uint); +FAULT_CONFIGFS_ATTR_NAMED(task_filter, "task-filter", task_filter, bool); + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER + +static ssize_t fault_stacktrace_depth_show(struct config_item *item, char *page) +{ + return fault_ulong_attr_show(to_fault_config(item)->attr.stacktrace_depth, page); +} + +static ssize_t fault_stacktrace_depth_store(struct config_item *item, const char *page, + size_t count) +{ + int result; + unsigned long tmp; + + result = kstrtoul(page, 0, &tmp); + if (result < 0) + return result; + + to_fault_config(item)->attr.stacktrace_depth = + min_t(unsigned long, tmp, MAX_STACK_TRACE_DEPTH); + + return count; +} + +CONFIGFS_ATTR_NAMED(fault_, stacktrace_depth, "stacktrace-depth"); + +static ssize_t fault_xul_attr_show(unsigned long val, char *page) +{ + return snprintf(page, PAGE_SIZE, + sizeof(val) == sizeof(u32) ? "0x%08lx\n" : "0x%016lx\n", val); +} + +static ssize_t fault_xul_attr_store(unsigned long *val, const char *page, size_t count) +{ + return fault_ulong_attr_store(val, page, count); +} + +FAULT_CONFIGFS_ATTR_NAMED(require_start, "require-start", require_start, xul); +FAULT_CONFIGFS_ATTR_NAMED(require_end, "require-end", require_end, xul); +FAULT_CONFIGFS_ATTR_NAMED(reject_start, "reject-start", reject_start, xul); +FAULT_CONFIGFS_ATTR_NAMED(reject_end, "reject-end", reject_end, xul); + +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + +static struct configfs_attribute *fault_config_attrs[] = { + &fault_attr_probability, + &fault_attr_interval, + &fault_attr_times, + &fault_attr_space, + &fault_attr_verbose, + &fault_attr_ratelimit_interval, + &fault_attr_ratelimit_burst, + &fault_attr_task_filter, +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER + &fault_attr_stacktrace_depth, + &fault_attr_require_start, + &fault_attr_require_end, + &fault_attr_reject_start, + &fault_attr_reject_end, +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + NULL, +}; + +static const struct config_item_type fault_config_type = { + .ct_attrs = fault_config_attrs, + .ct_owner = THIS_MODULE, +}; + +void fault_config_init(struct fault_config *config, const char *name) +{ + config_group_init_type_name(&config->group, name, &fault_config_type); +} +EXPORT_SYMBOL_GPL(fault_config_init); + +#endif /* CONFIG_FAULT_INJECTION_CONFIGFS */ diff --git a/lib/find_bit.c b/lib/find_bit.c index c10920e66788..32f99e9a670e 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l EXPORT_SYMBOL(_find_next_andnot_bit); #endif +#ifndef find_next_or_bit +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_or_bit); +#endif + #ifndef find_next_zero_bit unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) diff --git a/lib/group_cpus.c b/lib/group_cpus.c index 9c837a35fef7..aa3f6815bb12 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -426,3 +426,4 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) return masks; } #endif /* CONFIG_SMP */ +EXPORT_SYMBOL_GPL(group_cpus_evenly); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 274014e4eafe..c3dbe994112c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -126,13 +126,13 @@ __out: \ iterate_buf(i, n, base, len, off, \ i->ubuf, (I)) \ } else if (likely(iter_is_iovec(i))) { \ - const struct iovec *iov = i->iov; \ + const struct iovec *iov = iter_iov(i); \ void __user *base; \ size_t len; \ iterate_iovec(i, n, base, len, off, \ iov, (I)) \ - i->nr_segs -= iov - i->iov; \ - i->iov = iov; \ + i->nr_segs -= iov - iter_iov(i); \ + i->__iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ void *base; \ @@ -172,6 +172,18 @@ static int copyout(void __user *to, const void *from, size_t n) return n; } +static int copyout_nofault(void __user *to, const void *from, size_t n) +{ + long res; + + if (should_fail_usercopy()) + return n; + + res = copy_to_user_nofault(to, from, n); + + return res < 0 ? n : res; +} + static int copyin(void *to, const void __user *from, size_t n) { size_t res = n; @@ -355,7 +367,7 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) size_t skip; size -= count; - for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { + for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) { size_t len = min(count, p->iov_len - skip); size_t ret; @@ -398,7 +410,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) size_t skip; size -= count; - for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { + for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) { size_t len = min(count, p->iov_len - skip); size_t ret; @@ -425,7 +437,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, .nofault = false, .user_backed = true, .data_source = direction, - .iov = iov, + .__iov = iov, .nr_segs = nr_segs, .iov_offset = 0, .count = count @@ -734,6 +746,42 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, } EXPORT_SYMBOL(copy_page_to_iter); +size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, + struct iov_iter *i) +{ + size_t res = 0; + + if (!page_copy_sane(page, offset, bytes)) + return 0; + if (WARN_ON_ONCE(i->data_source)) + return 0; + if (unlikely(iov_iter_is_pipe(i))) + return copy_page_to_iter_pipe(page, offset, bytes, i); + page += offset / PAGE_SIZE; // first subpage + offset %= PAGE_SIZE; + while (1) { + void *kaddr = kmap_local_page(page); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + + iterate_and_advance(i, n, base, len, off, + copyout_nofault(base, kaddr + offset + off, len), + memcpy(base, kaddr + offset + off, len) + ) + kunmap_local(kaddr); + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } + } + return res; +} +EXPORT_SYMBOL(copy_page_to_iter_nofault); + size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -876,14 +924,14 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) i->count -= size; size += i->iov_offset; // from beginning of current segment - for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) { + for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) { if (likely(size < iov->iov_len)) break; size -= iov->iov_len; } i->iov_offset = size; - i->nr_segs -= iov - i->iov; - i->iov = iov; + i->nr_segs -= iov - iter_iov(i); + i->__iov = iov; } void iov_iter_advance(struct iov_iter *i, size_t size) @@ -958,12 +1006,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) unroll -= n; } } else { /* same logics for iovec and kvec */ - const struct iovec *iov = i->iov; + const struct iovec *iov = iter_iov(i); while (1) { size_t n = (--iov)->iov_len; i->nr_segs++; if (unroll <= n) { - i->iov = iov; + i->__iov = iov; i->iov_offset = n - unroll; return; } @@ -980,7 +1028,7 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) { if (i->nr_segs > 1) { if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) - return min(i->count, i->iov->iov_len - i->iov_offset); + return min(i->count, iter_iov(i)->iov_len - i->iov_offset); if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); } @@ -1095,13 +1143,14 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, unsigned k; for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->iov[k].iov_len - skip; + const struct iovec *iov = iter_iov(i) + k; + size_t len = iov->iov_len - skip; if (len > size) len = size; if (len & len_mask) return false; - if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask) + if ((unsigned long)(iov->iov_base + skip) & addr_mask) return false; size -= len; @@ -1194,9 +1243,10 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) unsigned k; for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->iov[k].iov_len - skip; + const struct iovec *iov = iter_iov(i) + k; + size_t len = iov->iov_len - skip; if (len) { - res |= (unsigned long)i->iov[k].iov_base + skip; + res |= (unsigned long)iov->iov_base + skip; if (len > size) len = size; res |= len; @@ -1273,14 +1323,15 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) return ~0U; for (k = 0; k < i->nr_segs; k++) { - if (i->iov[k].iov_len) { - unsigned long base = (unsigned long)i->iov[k].iov_base; + const struct iovec *iov = iter_iov(i) + k; + if (iov->iov_len) { + unsigned long base = (unsigned long)iov->iov_base; if (v) // if not the first one res |= base | v; // this start | previous end - v = base + i->iov[k].iov_len; - if (size <= i->iov[k].iov_len) + v = base + iov->iov_len; + if (size <= iov->iov_len) break; - size -= i->iov[k].iov_len; + size -= iov->iov_len; } } return res; @@ -1396,13 +1447,14 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) return (unsigned long)i->ubuf + i->iov_offset; for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { - size_t len = i->iov[k].iov_len - skip; + const struct iovec *iov = iter_iov(i) + k; + size_t len = iov->iov_len - skip; if (unlikely(!len)) continue; if (*size > len) *size = len; - return (unsigned long)i->iov[k].iov_base + skip; + return (unsigned long)iov->iov_base + skip; } BUG(); // if it had been empty, we wouldn't get called } @@ -1614,7 +1666,7 @@ static int iov_npages(const struct iov_iter *i, int maxpages) const struct iovec *p; int npages = 0; - for (p = i->iov; size; skip = 0, p++) { + for (p = iter_iov(i); size; skip = 0, p++) { unsigned offs = offset_in_page(p->iov_base + skip); size_t len = min(p->iov_len - skip, size); @@ -1691,14 +1743,14 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) flags); else if (iov_iter_is_kvec(new) || iter_is_iovec(new)) /* iovec and kvec have identical layout */ - return new->iov = kmemdup(new->iov, + return new->__iov = kmemdup(new->__iov, new->nr_segs * sizeof(struct iovec), flags); return NULL; } EXPORT_SYMBOL(dup_iter); -static int copy_compat_iovec_from_user(struct iovec *iov, +static __noclone int copy_compat_iovec_from_user(struct iovec *iov, const struct iovec __user *uvec, unsigned long nr_segs) { const struct compat_iovec __user *uiov = @@ -1731,18 +1783,35 @@ uaccess_end: } static int copy_iovec_from_user(struct iovec *iov, - const struct iovec __user *uvec, unsigned long nr_segs) + const struct iovec __user *uiov, unsigned long nr_segs) { - unsigned long seg; + int ret = -EFAULT; - if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; - for (seg = 0; seg < nr_segs; seg++) { - if ((ssize_t)iov[seg].iov_len < 0) - return -EINVAL; - } - return 0; + do { + void __user *buf; + ssize_t len; + + unsafe_get_user(len, &uiov->iov_len, uaccess_end); + unsafe_get_user(buf, &uiov->iov_base, uaccess_end); + + /* check for size_t not fitting in ssize_t .. */ + if (unlikely(len < 0)) { + ret = -EINVAL; + goto uaccess_end; + } + iov->iov_base = buf; + iov->iov_len = len; + + uiov++; iov++; + } while (--nr_segs); + + ret = 0; +uaccess_end: + user_access_end(); + return ret; } struct iovec *iovec_from_user(const struct iovec __user *uvec, @@ -1767,7 +1836,7 @@ struct iovec *iovec_from_user(const struct iovec __user *uvec, return ERR_PTR(-ENOMEM); } - if (compat) + if (unlikely(compat)) ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); else ret = copy_iovec_from_user(iov, uvec, nr_segs); @@ -1780,6 +1849,30 @@ struct iovec *iovec_from_user(const struct iovec __user *uvec, return iov; } +/* + * Single segment iovec supplied by the user, import it as ITER_UBUF. + */ +static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, + struct iovec **iovp, struct iov_iter *i, + bool compat) +{ + struct iovec *iov = *iovp; + ssize_t ret; + + if (compat) + ret = copy_compat_iovec_from_user(iov, uvec, 1); + else + ret = copy_iovec_from_user(iov, uvec, 1); + if (unlikely(ret)) + return ret; + + ret = import_ubuf(type, iov->iov_base, iov->iov_len, i); + if (unlikely(ret)) + return ret; + *iovp = NULL; + return i->count; +} + ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat) @@ -1788,6 +1881,9 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned long seg; struct iovec *iov; + if (nr_segs == 1) + return __import_iovec_ubuf(type, uvec, iovp, i, compat); + iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); if (IS_ERR(iov)) { *iovp = NULL; @@ -1866,9 +1962,7 @@ int import_single_range(int rw, void __user *buf, size_t len, if (unlikely(!access_ok(buf, len))) return -EFAULT; - iov->iov_base = buf; - iov->iov_len = len; - iov_iter_init(i, rw, iov, 1, len); + iov_iter_ubuf(i, rw, buf, len); return 0; } EXPORT_SYMBOL(import_single_range); @@ -1918,7 +2012,7 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) if (iov_iter_is_bvec(i)) i->bvec -= state->nr_segs - i->nr_segs; else - i->iov -= state->nr_segs - i->nr_segs; + i->__iov -= state->nr_segs - i->nr_segs; i->nr_segs = state->nr_segs; } diff --git a/lib/kobject.c b/lib/kobject.c index 6e2f0bee3560..f79a434e1231 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -10,6 +10,8 @@ * about using the kobject interface. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> @@ -127,7 +129,7 @@ static int fill_kobj_path(const struct kobject *kobj, char *path, int length) *(path + --length) = '/'; } - pr_debug("kobject: '%s' (%p): %s: path = '%s'\n", kobject_name(kobj), + pr_debug("'%s' (%p): %s: path = '%s'\n", kobject_name(kobj), kobj, __func__, path); return 0; @@ -223,7 +225,7 @@ static int kobject_add_internal(struct kobject *kobj) kobj->parent = parent; } - pr_debug("kobject: '%s' (%p): %s: parent: '%s', set: '%s'\n", + pr_debug("'%s' (%p): %s: parent: '%s', set: '%s'\n", kobject_name(kobj), kobj, __func__, parent ? kobject_name(parent) : "<NULL>", kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>"); @@ -338,7 +340,7 @@ void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) /* do not error out as sometimes we can recover */ pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n", kobj); - dump_stack(); + dump_stack_lvl(KERN_ERR); } kobject_init_internal(kobj); @@ -347,7 +349,7 @@ void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) error: pr_err("kobject (%p): %s\n", kobj, err_str); - dump_stack(); + dump_stack_lvl(KERN_ERR); } EXPORT_SYMBOL(kobject_init); @@ -359,7 +361,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, retval = kobject_set_name_vargs(kobj, fmt, vargs); if (retval) { - pr_err("kobject: can not set name properly!\n"); + pr_err("can not set name properly!\n"); return retval; } kobj->parent = parent; @@ -411,7 +413,7 @@ int kobject_add(struct kobject *kobj, struct kobject *parent, if (!kobj->state_initialized) { pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n", kobject_name(kobj), kobj); - dump_stack(); + dump_stack_lvl(KERN_ERR); return -EINVAL; } va_start(args, fmt); @@ -588,7 +590,7 @@ static void __kobject_del(struct kobject *kobj) /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { - pr_debug("kobject: '%s' (%p): auto cleanup 'remove' event\n", + pr_debug("'%s' (%p): auto cleanup 'remove' event\n", kobject_name(kobj), kobj); kobject_uevent(kobj, KOBJ_REMOVE); } @@ -658,16 +660,16 @@ static void kobject_cleanup(struct kobject *kobj) const struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; - pr_debug("kobject: '%s' (%p): %s, parent %p\n", + pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) - pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", + pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", kobject_name(kobj), kobj); /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { - pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n", + pr_debug("'%s' (%p): auto cleanup kobject_del\n", kobject_name(kobj), kobj); __kobject_del(kobj); } else { @@ -676,14 +678,14 @@ static void kobject_cleanup(struct kobject *kobj) } if (t && t->release) { - pr_debug("kobject: '%s' (%p): calling ktype release\n", + pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); } /* free name if we allocated it */ if (name) { - pr_debug("kobject: '%s': free name\n", name); + pr_debug("'%s': free name\n", name); kfree_const(name); } @@ -703,8 +705,8 @@ static void kobject_release(struct kref *kref) struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE unsigned long delay = HZ + HZ * get_random_u32_below(4); - pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", - kobject_name(kobj), kobj, __func__, kobj->parent, delay); + pr_info("'%s' (%p): %s, parent %p (delayed %ld)\n", + kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); schedule_delayed_work(&kobj->release, delay); @@ -733,7 +735,7 @@ EXPORT_SYMBOL(kobject_put); static void dynamic_kobj_release(struct kobject *kobj) { - pr_debug("kobject: (%p): %s\n", kobj, __func__); + pr_debug("(%p): %s\n", kobj, __func__); kfree(kobj); } @@ -910,7 +912,7 @@ EXPORT_SYMBOL_GPL(kset_find_obj); static void kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); - pr_debug("kobject: '%s' (%p): %s\n", + pr_debug("'%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); kfree(kset); } diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c index de0ee2e03ed6..b08bb1fba106 100644 --- a/lib/kunit/debugfs.c +++ b/lib/kunit/debugfs.c @@ -55,14 +55,24 @@ static int debugfs_print_results(struct seq_file *seq, void *v) enum kunit_status success = kunit_suite_has_succeeded(suite); struct kunit_case *test_case; - if (!suite || !suite->log) + if (!suite) return 0; - seq_printf(seq, "%s", suite->log); + /* Print KTAP header so the debugfs log can be parsed as valid KTAP. */ + seq_puts(seq, "KTAP version 1\n"); + seq_puts(seq, "1..1\n"); + + /* Print suite header because it is not stored in the test logs. */ + seq_puts(seq, KUNIT_SUBTEST_INDENT "KTAP version 1\n"); + seq_printf(seq, KUNIT_SUBTEST_INDENT "# Subtest: %s\n", suite->name); + seq_printf(seq, KUNIT_SUBTEST_INDENT "1..%zd\n", kunit_suite_num_test_cases(suite)); kunit_suite_for_each_test_case(suite, test_case) debugfs_print_result(seq, suite, test_case); + if (suite->log) + seq_printf(seq, "%s", suite->log); + seq_printf(seq, "%s %d %s\n", kunit_status_to_ok_not_ok(success), 1, suite->name); return 0; diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 4df0335d0d06..42e44caa1bdd 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -6,6 +6,7 @@ * Author: Brendan Higgins <brendanhiggins@google.com> */ #include <kunit/test.h> +#include <kunit/test-bug.h> #include "try-catch-impl.h" @@ -443,18 +444,6 @@ static struct kunit_suite kunit_resource_test_suite = { .test_cases = kunit_resource_test_cases, }; -static void kunit_log_test(struct kunit *test); - -static struct kunit_case kunit_log_test_cases[] = { - KUNIT_CASE(kunit_log_test), - {} -}; - -static struct kunit_suite kunit_log_test_suite = { - .name = "kunit-log-test", - .test_cases = kunit_log_test_cases, -}; - static void kunit_log_test(struct kunit *test) { struct kunit_suite suite; @@ -481,6 +470,29 @@ static void kunit_log_test(struct kunit *test) #endif } +static void kunit_log_newline_test(struct kunit *test) +{ + kunit_info(test, "Add newline\n"); + if (test->log) { + KUNIT_ASSERT_NOT_NULL_MSG(test, strstr(test->log, "Add newline\n"), + "Missing log line, full log:\n%s", test->log); + KUNIT_EXPECT_NULL(test, strstr(test->log, "Add newline\n\n")); + } else { + kunit_skip(test, "only useful when debugfs is enabled"); + } +} + +static struct kunit_case kunit_log_test_cases[] = { + KUNIT_CASE(kunit_log_test), + KUNIT_CASE(kunit_log_newline_test), + {} +}; + +static struct kunit_suite kunit_log_test_suite = { + .name = "kunit-log-test", + .test_cases = kunit_log_test_cases, +}; + static void kunit_status_set_failure_test(struct kunit *test) { struct kunit fake; @@ -521,7 +533,46 @@ static struct kunit_suite kunit_status_test_suite = { .test_cases = kunit_status_test_cases, }; +static void kunit_current_test(struct kunit *test) +{ + /* Check results of both current->kunit_test and + * kunit_get_current_test() are equivalent to current test. + */ + KUNIT_EXPECT_PTR_EQ(test, test, current->kunit_test); + KUNIT_EXPECT_PTR_EQ(test, test, kunit_get_current_test()); +} + +static void kunit_current_fail_test(struct kunit *test) +{ + struct kunit fake; + + kunit_init_test(&fake, "fake test", NULL); + KUNIT_EXPECT_EQ(test, fake.status, KUNIT_SUCCESS); + + /* Set current->kunit_test to fake test. */ + current->kunit_test = &fake; + + kunit_fail_current_test("This should make `fake` test fail."); + KUNIT_EXPECT_EQ(test, fake.status, (enum kunit_status)KUNIT_FAILURE); + kunit_cleanup(&fake); + + /* Reset current->kunit_test to current test. */ + current->kunit_test = test; +} + +static struct kunit_case kunit_current_test_cases[] = { + KUNIT_CASE(kunit_current_test), + KUNIT_CASE(kunit_current_fail_test), + {} +}; + +static struct kunit_suite kunit_current_test_suite = { + .name = "kunit_current", + .test_cases = kunit_current_test_cases, +}; + kunit_test_suites(&kunit_try_catch_test_suite, &kunit_resource_test_suite, - &kunit_log_test_suite, &kunit_status_test_suite); + &kunit_log_test_suite, &kunit_status_test_suite, + &kunit_current_test_suite); MODULE_LICENSE("GPL v2"); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index c9e15bb60058..e2910b261112 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -108,28 +108,51 @@ static void kunit_print_test_stats(struct kunit *test, stats.total); } +/** + * kunit_log_newline() - Add newline to the end of log if one is not + * already present. + * @log: The log to add the newline to. + */ +static void kunit_log_newline(char *log) +{ + int log_len, len_left; + + log_len = strlen(log); + len_left = KUNIT_LOG_SIZE - log_len - 1; + + if (log_len > 0 && log[log_len - 1] != '\n') + strncat(log, "\n", len_left); +} + /* * Append formatted message to log, size of which is limited to * KUNIT_LOG_SIZE bytes (including null terminating byte). */ void kunit_log_append(char *log, const char *fmt, ...) { - char line[KUNIT_LOG_SIZE]; va_list args; - int len_left; + int len, log_len, len_left; if (!log) return; - len_left = KUNIT_LOG_SIZE - strlen(log) - 1; + log_len = strlen(log); + len_left = KUNIT_LOG_SIZE - log_len - 1; if (len_left <= 0) return; + /* Evaluate length of line to add to log */ + va_start(args, fmt); + len = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + /* Print formatted line to the log */ va_start(args, fmt); - vsnprintf(line, sizeof(line), fmt, args); + vsnprintf(log + log_len, min(len, len_left), fmt, args); va_end(args); - strncat(log, line, len_left); + /* Add newline to end of log if not already present. */ + kunit_log_newline(log); } EXPORT_SYMBOL_GPL(kunit_log_append); @@ -147,10 +170,18 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases); static void kunit_print_suite_start(struct kunit_suite *suite) { - kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "KTAP version 1\n"); - kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s", + /* + * We do not log the test suite header as doing so would + * mean debugfs display would consist of the test suite + * header prior to individual test results. + * Hence directly printk the suite status, and we will + * separately seq_printf() the suite header for the debugfs + * representation. + */ + pr_info(KUNIT_SUBTEST_INDENT "KTAP version 1\n"); + pr_info(KUNIT_SUBTEST_INDENT "# Subtest: %s\n", suite->name); - kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd", + pr_info(KUNIT_SUBTEST_INDENT "1..%zd\n", kunit_suite_num_test_cases(suite)); } @@ -167,10 +198,9 @@ static void kunit_print_ok_not_ok(void *test_or_suite, /* * We do not log the test suite results as doing so would - * mean debugfs display would consist of the test suite - * description and status prior to individual test results. - * Hence directly printk the suite status, and we will - * separately seq_printf() the suite status for the debugfs + * mean debugfs display would consist of an incorrect test + * number. Hence directly printk the suite result, and we will + * separately seq_printf() the suite results for the debugfs * representation. */ if (suite) @@ -437,7 +467,6 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite, struct kunit_try_catch_context context; struct kunit_try_catch *try_catch; - kunit_init_test(test, test_case->name, test_case->log); try_catch = &test->try_catch; kunit_try_catch_init(try_catch, @@ -533,6 +562,8 @@ int kunit_run_tests(struct kunit_suite *suite) struct kunit_result_stats param_stats = { 0 }; test_case->status = KUNIT_SKIPPED; + kunit_init_test(&test, test_case->name, test_case->log); + if (!test_case->generate_params) { /* Non-parameterised test. */ kunit_run_case_catch_errors(suite, test_case, &test); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 5ca0d815a95d..649e687413a0 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -65,12 +65,6 @@ static void __exit libcrc32c_mod_fini(void) crypto_free_shash(tfm); } -const char *crc32c_impl(void) -{ - return crypto_shash_driver_name(tfm); -} -EXPORT_SYMBOL(crc32c_impl); - module_init(libcrc32c_mod_init); module_exit(libcrc32c_mod_fini); diff --git a/lib/list-test.c b/lib/list-test.c index d374cf5d1a57..0cc27de9cec8 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -8,6 +8,7 @@ #include <kunit/test.h> #include <linux/list.h> +#include <linux/klist.h> struct list_test_struct { int data; @@ -1199,6 +1200,303 @@ static struct kunit_suite hlist_test_module = { .test_cases = hlist_test_cases, }; -kunit_test_suites(&list_test_module, &hlist_test_module); + +struct klist_test_struct { + int data; + struct klist klist; + struct klist_node klist_node; +}; + +static int node_count; +static struct klist_node *last_node; + +static void check_node(struct klist_node *node_ptr) +{ + node_count++; + last_node = node_ptr; +} + +static void check_delete_node(struct klist_node *node_ptr) +{ + node_count--; + last_node = node_ptr; +} + +static void klist_test_add_tail(struct kunit *test) +{ + struct klist_node a, b; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, NULL); + + klist_add_tail(&a, &mylist); + KUNIT_EXPECT_EQ(test, node_count, 1); + KUNIT_EXPECT_PTR_EQ(test, last_node, &a); + + klist_add_tail(&b, &mylist); + KUNIT_EXPECT_EQ(test, node_count, 2); + KUNIT_EXPECT_PTR_EQ(test, last_node, &b); + + /* should be [list] -> a -> b */ + klist_iter_init(&mylist, &i); + + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +static void klist_test_add_head(struct kunit *test) +{ + struct klist_node a, b; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, NULL); + + klist_add_head(&a, &mylist); + KUNIT_EXPECT_EQ(test, node_count, 1); + KUNIT_EXPECT_PTR_EQ(test, last_node, &a); + + klist_add_head(&b, &mylist); + KUNIT_EXPECT_EQ(test, node_count, 2); + KUNIT_EXPECT_PTR_EQ(test, last_node, &b); + + /* should be [list] -> b -> a */ + klist_iter_init(&mylist, &i); + + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +static void klist_test_add_behind(struct kunit *test) +{ + struct klist_node a, b, c, d; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, NULL); + + klist_add_head(&a, &mylist); + klist_add_head(&b, &mylist); + + klist_add_behind(&c, &a); + KUNIT_EXPECT_EQ(test, node_count, 3); + KUNIT_EXPECT_PTR_EQ(test, last_node, &c); + + klist_add_behind(&d, &b); + KUNIT_EXPECT_EQ(test, node_count, 4); + KUNIT_EXPECT_PTR_EQ(test, last_node, &d); + + klist_iter_init(&mylist, &i); + + /* should be [list] -> b -> d -> a -> c*/ + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &d); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &c); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +static void klist_test_add_before(struct kunit *test) +{ + struct klist_node a, b, c, d; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, NULL); + + klist_add_head(&a, &mylist); + klist_add_head(&b, &mylist); + klist_add_before(&c, &a); + KUNIT_EXPECT_EQ(test, node_count, 3); + KUNIT_EXPECT_PTR_EQ(test, last_node, &c); + + klist_add_before(&d, &b); + KUNIT_EXPECT_EQ(test, node_count, 4); + KUNIT_EXPECT_PTR_EQ(test, last_node, &d); + + klist_iter_init(&mylist, &i); + + /* should be [list] -> b -> d -> a -> c*/ + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &d); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &c); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +/* + * Verify that klist_del() delays the deletion of a node until there + * are no other references to it + */ +static void klist_test_del_refcount_greater_than_zero(struct kunit *test) +{ + struct klist_node a, b, c, d; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, &check_delete_node); + + /* Add nodes a,b,c,d to the list*/ + klist_add_tail(&a, &mylist); + klist_add_tail(&b, &mylist); + klist_add_tail(&c, &mylist); + klist_add_tail(&d, &mylist); + + klist_iter_init(&mylist, &i); + + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + /* Advance the iterator to point to node c*/ + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &c); + + /* Try to delete node c while there is a reference to it*/ + klist_del(&c); + + /* + * Verify that node c is still attached to the list even after being + * deleted. Since the iterator still points to c, the reference count is not + * decreased to 0 + */ + KUNIT_EXPECT_TRUE(test, klist_node_attached(&c)); + + /* Check that node c has not been removed yet*/ + KUNIT_EXPECT_EQ(test, node_count, 4); + KUNIT_EXPECT_PTR_EQ(test, last_node, &d); + + klist_iter_exit(&i); + + /* + * Since the iterator is no longer pointing to node c, node c is removed + * from the list + */ + KUNIT_EXPECT_EQ(test, node_count, 3); + KUNIT_EXPECT_PTR_EQ(test, last_node, &c); + +} + +/* + * Verify that klist_del() deletes a node immediately when there are no + * other references to it. + */ +static void klist_test_del_refcount_zero(struct kunit *test) +{ + struct klist_node a, b, c, d; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, &check_delete_node); + + /* Add nodes a,b,c,d to the list*/ + klist_add_tail(&a, &mylist); + klist_add_tail(&b, &mylist); + klist_add_tail(&c, &mylist); + klist_add_tail(&d, &mylist); + /* Delete node c*/ + klist_del(&c); + + /* Check that node c is deleted from the list*/ + KUNIT_EXPECT_EQ(test, node_count, 3); + KUNIT_EXPECT_PTR_EQ(test, last_node, &c); + + /* Should be [list] -> a -> b -> d*/ + klist_iter_init(&mylist, &i); + + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &d); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +static void klist_test_remove(struct kunit *test) +{ + /* This test doesn't check correctness under concurrent access */ + struct klist_node a, b, c, d; + struct klist mylist; + struct klist_iter i; + + node_count = 0; + klist_init(&mylist, &check_node, &check_delete_node); + + /* Add nodes a,b,c,d to the list*/ + klist_add_tail(&a, &mylist); + klist_add_tail(&b, &mylist); + klist_add_tail(&c, &mylist); + klist_add_tail(&d, &mylist); + /* Delete node c*/ + klist_remove(&c); + + /* Check the nodes in the list*/ + KUNIT_EXPECT_EQ(test, node_count, 3); + KUNIT_EXPECT_PTR_EQ(test, last_node, &c); + + /* should be [list] -> a -> b -> d*/ + klist_iter_init(&mylist, &i); + + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &a); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &b); + KUNIT_EXPECT_PTR_EQ(test, klist_next(&i), &d); + KUNIT_EXPECT_NULL(test, klist_next(&i)); + + klist_iter_exit(&i); + +} + +static void klist_test_node_attached(struct kunit *test) +{ + struct klist_node a = {}; + struct klist mylist; + + klist_init(&mylist, NULL, NULL); + + KUNIT_EXPECT_FALSE(test, klist_node_attached(&a)); + klist_add_head(&a, &mylist); + KUNIT_EXPECT_TRUE(test, klist_node_attached(&a)); + klist_del(&a); + KUNIT_EXPECT_FALSE(test, klist_node_attached(&a)); + +} + +static struct kunit_case klist_test_cases[] = { + KUNIT_CASE(klist_test_add_tail), + KUNIT_CASE(klist_test_add_head), + KUNIT_CASE(klist_test_add_behind), + KUNIT_CASE(klist_test_add_before), + KUNIT_CASE(klist_test_del_refcount_greater_than_zero), + KUNIT_CASE(klist_test_del_refcount_zero), + KUNIT_CASE(klist_test_remove), + KUNIT_CASE(klist_test_node_attached), + {}, +}; + +static struct kunit_suite klist_test_module = { + .name = "klist", + .test_cases = klist_test_cases, +}; + +kunit_test_suites(&list_test_module, &hlist_test_module, &klist_test_module); MODULE_LICENSE("GPL v2"); diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 646297cae5d1..110a36479dce 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head) */ static void ma_free_rcu(struct maple_node *node) { - node->parent = ma_parent_ptr(node); + WARN_ON(node->parent != ma_parent_ptr(node)); call_rcu(&node->rcu, mt_free_rcu); } @@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode) */ static inline bool ma_dead_node(const struct maple_node *node) { - struct maple_node *parent = (void *)((unsigned long) - node->parent & ~MAPLE_NODE_MASK); + struct maple_node *parent; + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); + parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent == node); } + /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node @@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode) struct maple_node *parent, *node; node = mte_to_node(enode); + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); parent = mte_parent(enode); return (parent == node); } @@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas) * @node - the maple node * @type - the node type * + * In the event of a dead node, this array may be %NULL + * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, @@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt, return rcu_dereference_check(slots[offset], mt_locked(mt)); } +static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, + unsigned char offset) +{ + return rcu_dereference_protected(slots[offset], mt_locked(mt)); +} /* * mas_slot_locked() - Get the slot value when holding the maple tree lock. * @mas: The maple state @@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt, static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, unsigned char offset) { - return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); + return mt_slot_locked(mas->tree, slots, offset); } /* @@ -900,6 +912,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, } /* + * mt_clear_meta() - clear the metadata information of a node, if it exists + * @mt: The maple tree + * @mn: The maple node + * @type: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, + enum maple_type type) +{ + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + + switch (type) { + case maple_range_64: + pivots = mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots = mn->mr64.slot; + next = mt_slot_locked(mt, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && + mte_node_type(next)))) + return; /* no metadata, could be node */ + } + fallthrough; + case maple_arange_64: + meta = ma_meta(mn, type); + break; + default: + return; + } + + meta->gap = 0; + meta->end = 0; +} + +/* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node * @mt: The maple node type @@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas) a_type = mas_parent_enum(mas, p_enode); a_node = mte_parent(p_enode); a_slot = mte_parent_slot(p_enode); - pivots = ma_pivots(a_node, a_type); a_enode = mt_mk_node(a_node, a_type); + pivots = ma_pivots(a_node, a_type); + + if (unlikely(ma_dead_node(a_node))) + return 1; if (!set_min && a_slot) { set_min = true; @@ -1249,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) node = mas->alloc; node->request_count = 0; while (requested) { - max_req = MAPLE_ALLOC_SLOTS; - if (node->node_count) { - unsigned int offset = node->node_count; - - slots = (void **)&node->slot[offset]; - max_req -= offset; - } else { - slots = (void **)&node->slot; - } - + max_req = MAPLE_ALLOC_SLOTS - node->node_count; + slots = (void **)&node->slot[node->node_count]; max_req = min(requested, max_req); count = mt_alloc_bulk(gfp, max_req, slots); if (!count) goto nomem_bulk; + if (node->node_count == 0) { + node->slot[0]->node_count = 0; + node->slot[0]->request_count = 0; + } + node->node_count += count; allocated += count; node = node->slot[0]; - node->node_count = 0; - node->request_count = 0; requested -= count; } mas->alloc->total = allocated; @@ -1354,12 +1403,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas) mas->max = ULONG_MAX; mas->depth = 0; +retry: root = mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 1; mas->node = mte_safe_root(root); mas->offset = 0; + if (mte_dead_node(mas->node)) + goto retry; + return NULL; } @@ -1401,6 +1454,9 @@ static inline unsigned char ma_data_end(struct maple_node *node, { unsigned char offset; + if (!pivots) + return 0; + if (type == maple_arange_64) return ma_meta_end(node, type); @@ -1436,6 +1492,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas) return ma_meta_end(node, type); pivots = ma_pivots(node, type); + if (unlikely(ma_dead_node(node))) + return 0; + offset = mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); @@ -1724,8 +1783,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced) rcu_assign_pointer(slots[offset], mas->node); } - if (!advanced) + if (!advanced) { + mte_set_node_dead(old_enode); mas_free(mas, old_enode); + } } /* @@ -2251,9 +2312,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; - unsigned char count; - unsigned char offset; - unsigned long index, min, max; + unsigned char count, offset; if (unlikely(ma_is_dense(wr_mas->type))) { wr_mas->r_max = wr_mas->r_min = mas->index; @@ -2266,34 +2325,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots, mas->max); offset = mas->offset; - min = mas_safe_min(mas, wr_mas->pivots, offset); - if (unlikely(offset == count)) - goto max; - - max = wr_mas->pivots[offset]; - index = mas->index; - if (unlikely(index <= max)) - goto done; - if (unlikely(!max && offset)) - goto max; + while (offset < count && mas->index > wr_mas->pivots[offset]) + offset++; - min = max + 1; - while (++offset < count) { - max = wr_mas->pivots[offset]; - if (index <= max) - goto done; - else if (unlikely(!max)) - break; - - min = max + 1; - } - -max: - max = mas->max; -done: - wr_mas->r_max = max; - wr_mas->r_min = min; + wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; + wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); wr_mas->offset_end = mas->offset = offset; } @@ -3221,7 +3258,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end if (tmp < max_p) memset(pivs + tmp, 0, - sizeof(unsigned long *) * (max_p - tmp)); + sizeof(unsigned long) * (max_p - tmp)); if (tmp < mt_slots[mt]) memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); @@ -3659,10 +3696,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry) slot++; mas->depth = 1; mas_set_height(mas); - + ma_set_meta(node, maple_leaf_64, 0, slot); /* swap the new root into the tree */ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); - ma_set_meta(node, maple_leaf_64, 0, slot); return slot; } @@ -3875,18 +3911,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) end = ma_data_end(node, type, pivots, max); if (unlikely(ma_dead_node(node))) goto dead_node; - - if (pivots[offset] >= mas->index) - goto next; - do { - offset++; - } while ((offset < end) && (pivots[offset] < mas->index)); - - if (likely(offset > end)) - max = pivots[offset]; + if (pivots[offset] >= mas->index) { + max = pivots[offset]; + break; + } + } while (++offset < end); -next: slots = ma_slots(node, type); next = mt_slot(mas->tree, slots, offset); if (unlikely(ma_dead_node(node))) @@ -4164,6 +4195,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas) done: mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); if (in_rcu) { + mte_set_node_dead(mas->node); mas->node = mt_mk_node(newnode, wr_mas->type); mas_replace(mas, false); } else { @@ -4505,6 +4537,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) node = mas_mn(mas); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + mas->max = pivots[offset]; if (offset) mas->min = pivots[offset - 1] + 1; @@ -4526,6 +4561,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); offset = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + if (offset) mas->min = pivots[offset - 1] + 1; @@ -4574,6 +4612,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, struct maple_enode *enode; int level = 0; unsigned char offset; + unsigned char node_end; enum maple_type mt; void __rcu **slots; @@ -4597,7 +4636,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, node = mas_mn(mas); mt = mte_node_type(mas->node); pivots = ma_pivots(node, mt); - } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); + node_end = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + + } while (unlikely(offset == node_end)); slots = ma_slots(node, mt); pivot = mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4613,6 +4656,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, mt = mte_node_type(mas->node); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + offset = 0; pivot = pivots[0]; } @@ -4659,11 +4705,14 @@ static inline void *mas_next_nentry(struct ma_state *mas, return NULL; } - pivots = ma_pivots(node, type); slots = ma_slots(node, type); - mas->index = mas_safe_min(mas, pivots, mas->offset); + pivots = ma_pivots(node, type); count = ma_data_end(node, type, pivots, mas->max); - if (ma_dead_node(node)) + if (unlikely(ma_dead_node(node))) + return NULL; + + mas->index = mas_safe_min(mas, pivots, mas->offset); + if (unlikely(ma_dead_node(node))) return NULL; if (mas->index > max) @@ -4817,6 +4866,11 @@ retry: slots = ma_slots(mn, mt); pivots = ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + if (offset == mt_pivots[mt]) pivot = mas->max; else @@ -4887,7 +4941,8 @@ not_found: * Return: True if found in a leaf, false otherwise. * */ -static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) +static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, + unsigned long *gap_min, unsigned long *gap_max) { enum maple_type type = mte_node_type(mas->node); struct maple_node *node = mas_mn(mas); @@ -4952,8 +5007,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) if (unlikely(ma_is_leaf(type))) { mas->offset = offset; - mas->min = min; - mas->max = min + gap - 1; + *gap_min = min; + *gap_max = min + gap - 1; return true; } @@ -4977,10 +5032,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) { enum maple_type type = mte_node_type(mas->node); unsigned long pivot, min, gap = 0; - unsigned char offset; - unsigned long *gaps; - unsigned long *pivots = ma_pivots(mas_mn(mas), type); - void __rcu **slots = ma_slots(mas_mn(mas), type); + unsigned char offset, data_end; + unsigned long *gaps, *pivots; + void __rcu **slots; + struct maple_node *node; bool found = false; if (ma_is_dense(type)) { @@ -4988,13 +5043,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) return true; } - gaps = ma_gaps(mte_to_node(mas->node), type); + node = mas_mn(mas); + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + gaps = ma_gaps(node, type); offset = mas->offset; min = mas_safe_min(mas, pivots, offset); - for (; offset < mt_slots[type]; offset++) { - pivot = mas_safe_pivot(mas, pivots, offset, type); - if (offset && !pivot) - break; + data_end = ma_data_end(node, type, pivots, mas->max); + for (; offset <= data_end; offset++) { + pivot = mas_logical_pivot(mas, pivots, offset, type); /* Not within lower bounds */ if (mas->index > pivot) @@ -5099,35 +5156,21 @@ static inline bool mas_rewind_node(struct ma_state *mas) */ static inline bool mas_skip_node(struct ma_state *mas) { - unsigned char slot, slot_count; - unsigned long *pivots; - enum maple_type mt; + if (mas_is_err(mas)) + return false; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; do { if (mte_is_root(mas->node)) { - slot = mas->offset; - if (slot > slot_count) { + if (mas->offset >= mas_data_end(mas)) { mas_set_err(mas, -EBUSY); return false; } } else { mas_ascend(mas); - slot = mas->offset; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; } - } while (slot > slot_count); - - mas->offset = ++slot; - pivots = ma_pivots(mas_mn(mas), mt); - if (slot > 0) - mas->min = pivots[slot - 1] + 1; - - if (slot <= slot_count) - mas->max = pivots[slot]; + } while (mas->offset >= mas_data_end(mas)); + mas->offset++; return true; } @@ -5207,25 +5250,28 @@ static inline void mas_fill_gap(struct ma_state *mas, void *entry, * @size: The size of the gap * @fwd: Searching forward or back */ -static inline void mas_sparse_area(struct ma_state *mas, unsigned long min, +static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size, bool fwd) { - unsigned long start = 0; - - if (!unlikely(mas_is_none(mas))) - start++; + if (!unlikely(mas_is_none(mas)) && min == 0) { + min++; + /* + * At this time, min is increased, we need to recheck whether + * the size is satisfied. + */ + if (min > max || max - min + 1 < size) + return -EBUSY; + } /* mas_is_ptr */ - if (start < min) - start = min; - if (fwd) { - mas->index = start; - mas->last = start + size - 1; - return; + mas->index = min; + mas->last = min + size - 1; + } else { + mas->last = max; + mas->index = max - size + 1; } - - mas->index = max; + return 0; } /* @@ -5243,6 +5289,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long *pivots; enum maple_type mt; + if (min >= max) + return -EINVAL; + if (mas_is_start(mas)) mas_start(mas); else if (mas->offset >= 2) @@ -5251,10 +5300,8 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, return -EBUSY; /* Empty set */ - if (mas_is_none(mas) || mas_is_ptr(mas)) { - mas_sparse_area(mas, min, max, size, true); - return 0; - } + if (mas_is_none(mas) || mas_is_ptr(mas)) + return mas_sparse_area(mas, min, max, size, true); /* The start of the window can only be within these values */ mas->index = min; @@ -5297,6 +5344,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, { struct maple_enode *last = mas->node; + if (min >= max) + return -EINVAL; + if (mas_is_start(mas)) { mas_start(mas); mas->offset = mas_data_end(mas); @@ -5307,16 +5357,14 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, } /* Empty set. */ - if (mas_is_none(mas) || mas_is_ptr(mas)) { - mas_sparse_area(mas, min, max, size, false); - return 0; - } + if (mas_is_none(mas) || mas_is_ptr(mas)) + return mas_sparse_area(mas, min, max, size, false); /* The start of the window can only be within these values. */ mas->index = min; mas->last = max; - while (!mas_rev_awalk(mas, size)) { + while (!mas_rev_awalk(mas, size, &min, &max)) { if (last == mas->node) { if (!mas_rewind_node(mas)) return -EBUSY; @@ -5331,17 +5379,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) return -EBUSY; - /* - * mas_rev_awalk() has set mas->min and mas->max to the gap values. If - * the maximum is outside the window we are searching, then use the last - * location in the search. - * mas->max and mas->min is the range of the gap. - * mas->index and mas->last are currently set to the search range. - */ - /* Trim the upper limit to the max. */ - if (mas->max <= mas->last) - mas->last = mas->max; + if (max <= mas->last) + mas->last = max; mas->index = mas->last - size + 1; return 0; @@ -5414,24 +5454,26 @@ no_gap: } /* - * mas_dead_leaves() - Mark all leaves of a node as dead. + * mte_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array + * @type: The maple node type * * Must hold the write lock. * * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, + void __rcu **slots) { struct maple_node *node; enum maple_type type; void *entry; int offset; - for (offset = 0; offset < mt_slot_count(mas->node); offset++) { - entry = mas_slot_locked(mas, slots, offset); + for (offset = 0; offset < mt_slot_count(enode); offset++) { + entry = mt_slot(mt, slots, offset); type = mte_node_type(entry); node = mte_to_node(entry); /* Use both node and type to catch LE & BE metadata */ @@ -5439,7 +5481,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) break; mte_set_node_dead(entry); - smp_wmb(); /* Needed for RCU */ node->type = type; rcu_assign_pointer(slots[offset], node); } @@ -5447,151 +5488,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) return offset; } -static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) +/** + * mte_dead_walk() - Walk down a dead tree to just before the leaves + * @enode: The maple encoded node + * @offset: The starting offset + * + * Note: This can only be used from the RCU callback context. + */ +static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) { struct maple_node *node, *next; void __rcu **slots = NULL; - next = mas_mn(mas); + next = mte_to_node(*enode); do { - mas->node = ma_enode_ptr(next); - node = mas_mn(mas); + *enode = ma_enode_ptr(next); + node = mte_to_node(*enode); slots = ma_slots(node, node->type); - next = mas_slot_locked(mas, slots, offset); + next = rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map)); offset = 0; } while (!ma_is_leaf(next->type)); return slots; } +/** + * mt_free_walk() - Walk & free a tree in the RCU callback context + * @head: The RCU head that's within the node. + * + * Note: This can only be used from the RCU callback context. + */ static void mt_free_walk(struct rcu_head *head) { void __rcu **slots; struct maple_node *node, *start; - struct maple_tree mt; + struct maple_enode *enode; unsigned char offset; enum maple_type type; - MA_STATE(mas, &mt, 0, 0); node = container_of(head, struct maple_node, rcu); if (ma_is_leaf(node->type)) goto free_leaf; - mt_init_flags(&mt, node->ma_flags); - mas_lock(&mas); start = node; - mas.node = mt_mk_node(node, node->type); - slots = mas_dead_walk(&mas, 0); - node = mas_mn(&mas); + enode = mt_mk_node(node, node->type); + slots = mte_dead_walk(&enode, 0); + node = mte_to_node(enode); do { mt_free_bulk(node->slot_len, slots); offset = node->parent_slot + 1; - mas.node = node->piv_parent; - if (mas_mn(&mas) == node) - goto start_slots_free; - - type = mte_node_type(mas.node); - slots = ma_slots(mte_to_node(mas.node), type); - if ((offset < mt_slots[type]) && (slots[offset])) - slots = mas_dead_walk(&mas, offset); - - node = mas_mn(&mas); + enode = node->piv_parent; + if (mte_to_node(enode) == node) + goto free_leaf; + + type = mte_node_type(enode); + slots = ma_slots(mte_to_node(enode), type); + if ((offset < mt_slots[type]) && + rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map))) + slots = mte_dead_walk(&enode, offset); + node = mte_to_node(enode); } while ((node != start) || (node->slot_len < offset)); slots = ma_slots(node, node->type); mt_free_bulk(node->slot_len, slots); -start_slots_free: - mas_unlock(&mas); free_leaf: mt_free_rcu(&node->rcu); } -static inline void __rcu **mas_destroy_descend(struct ma_state *mas, - struct maple_enode *prev, unsigned char offset) +static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, + struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) { struct maple_node *node; - struct maple_enode *next = mas->node; + struct maple_enode *next = *enode; void __rcu **slots = NULL; + enum maple_type type; + unsigned char next_offset = 0; do { - mas->node = next; - node = mas_mn(mas); - slots = ma_slots(node, mte_node_type(mas->node)); - next = mas_slot_locked(mas, slots, 0); + *enode = next; + node = mte_to_node(*enode); + type = mte_node_type(*enode); + slots = ma_slots(node, type); + next = mt_slot_locked(mt, slots, next_offset); if ((mte_dead_node(next))) - next = mas_slot_locked(mas, slots, 1); + next = mt_slot_locked(mt, slots, ++next_offset); - mte_set_node_dead(mas->node); - node->type = mte_node_type(mas->node); + mte_set_node_dead(*enode); + node->type = type; node->piv_parent = prev; node->parent_slot = offset; - offset = 0; - prev = mas->node; + offset = next_offset; + next_offset = 0; + prev = *enode; } while (!mte_is_leaf(next)); return slots; } -static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, +static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, bool free) { void __rcu **slots; struct maple_node *node = mte_to_node(enode); struct maple_enode *start; - struct maple_tree mt; - - MA_STATE(mas, &mt, 0, 0); - if (mte_is_leaf(enode)) + if (mte_is_leaf(enode)) { + node->type = mte_node_type(enode); goto free_leaf; + } - mt_init_flags(&mt, ma_flags); - mas_lock(&mas); - - mas.node = start = enode; - slots = mas_destroy_descend(&mas, start, 0); - node = mas_mn(&mas); + start = enode; + slots = mte_destroy_descend(&enode, mt, start, 0); + node = mte_to_node(enode); // Updated in the above call. do { enum maple_type type; unsigned char offset; struct maple_enode *parent, *tmp; - node->slot_len = mas_dead_leaves(&mas, slots); + node->slot_len = mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); offset = node->parent_slot + 1; - mas.node = node->piv_parent; - if (mas_mn(&mas) == node) - goto start_slots_free; + enode = node->piv_parent; + if (mte_to_node(enode) == node) + goto free_leaf; - type = mte_node_type(mas.node); - slots = ma_slots(mte_to_node(mas.node), type); + type = mte_node_type(enode); + slots = ma_slots(mte_to_node(enode), type); if (offset >= mt_slots[type]) goto next; - tmp = mas_slot_locked(&mas, slots, offset); + tmp = mt_slot_locked(mt, slots, offset); if (mte_node_type(tmp) && mte_to_node(tmp)) { - parent = mas.node; - mas.node = tmp; - slots = mas_destroy_descend(&mas, parent, offset); + parent = enode; + enode = tmp; + slots = mte_destroy_descend(&enode, mt, parent, offset); } next: - node = mas_mn(&mas); - } while (start != mas.node); + node = mte_to_node(enode); + } while (start != enode); - node = mas_mn(&mas); - node->slot_len = mas_dead_leaves(&mas, slots); + node = mte_to_node(enode); + node->slot_len = mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); -start_slots_free: - mas_unlock(&mas); - free_leaf: if (free) mt_free_rcu(&node->rcu); + else + mt_clear_meta(mt, node, node->type); } /* @@ -5607,10 +5657,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode, struct maple_node *node = mte_to_node(enode); if (mt_in_rcu(mt)) { - mt_destroy_walk(enode, mt->ma_flags, false); + mt_destroy_walk(enode, mt, false); call_rcu(&node->rcu, mt_free_walk); } else { - mt_destroy_walk(enode, mt->ma_flags, true); + mt_destroy_walk(enode, mt, true); } } @@ -5740,6 +5790,7 @@ int mas_preallocate(struct ma_state *mas, gfp_t gfp) mas_reset(mas); return ret; } +EXPORT_SYMBOL_GPL(mas_preallocate); /* * mas_destroy() - destroy a maple state. @@ -6631,11 +6682,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn, while (likely(!ma_is_leaf(mt))) { MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); slots = ma_slots(mn, mt); - pivots = ma_pivots(mn, mt); - max = pivots[0]; entry = mas_slot(mas, slots, 0); + pivots = ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + max = pivots[0]; mas->node = entry; mn = mas_mn(mas); mt = mte_node_type(mas->node); @@ -6655,13 +6706,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn, if (likely(entry)) return entry; - pivots = ma_pivots(mn, mt); - mas->index = pivots[0] + 1; mas->offset = 1; entry = mas_slot(mas, slots, 1); + pivots = ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + mas->index = pivots[0] + 1; if (mas->index > limit) goto none; diff --git a/lib/packing.c b/lib/packing.c index a96169237ae6..3f656167c17e 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -198,5 +198,4 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, } EXPORT_SYMBOL(packing); -MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index dba56c5c1837..5004463c4f9f 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -122,8 +122,19 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, - const struct cpumask *cpu_mask) +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive(). + * + * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums + * from CPUs that are in the process of being taken offline. Dying cpus have + * been removed from the online mask, but may not have had the hotplug dead + * notifier called to fold the percpu count back into the global counter sum. + * By including dying CPUs in the iteration mask, we avoid this race condition + * so __percpu_counter_sum() just does the right thing when CPUs are being taken + * offline. + */ +s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; @@ -131,35 +142,15 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; - for_each_cpu(cpu, cpu_mask) { + for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } - -/* - * Add up all the per-cpu counts, return the result. This is a more accurate - * but much slower version of percpu_counter_read_positive() - */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) -{ - return __percpu_counter_sum_mask(fbc, cpu_online_mask); -} EXPORT_SYMBOL(__percpu_counter_sum); -/* - * This is slower version of percpu_counter_sum as it traverses all possible - * cpus. Use this only in the cases where accurate data is needed in the - * presense of CPUs getting offlined. - */ -s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return __percpu_counter_sum_mask(fbc, cpu_possible_mask); -} -EXPORT_SYMBOL(percpu_counter_sum_all); - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c index 6e77eb6d8e72..54e1809a38fd 100644 --- a/lib/pldmfw/pldmfw.c +++ b/lib/pldmfw/pldmfw.c @@ -875,5 +875,4 @@ out_release_data: EXPORT_SYMBOL(pldmfw_flash_image); MODULE_AUTHOR("Jacob Keller <jacob.e.keller@intel.com>"); -MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("PLDM firmware flash update library"); diff --git a/lib/rbtree.c b/lib/rbtree.c index c4ac5c2421f2..5114eda6309c 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) diff --git a/lib/rcuref.c b/lib/rcuref.c new file mode 100644 index 000000000000..5ec00a4a64d1 --- /dev/null +++ b/lib/rcuref.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * rcuref - A scalable reference count implementation for RCU managed objects + * + * rcuref is provided to replace open coded reference count implementations + * based on atomic_t. It protects explicitely RCU managed objects which can + * be visible even after the last reference has been dropped and the object + * is heading towards destruction. + * + * A common usage pattern is: + * + * get() + * rcu_read_lock(); + * p = get_ptr(); + * if (p && !atomic_inc_not_zero(&p->refcnt)) + * p = NULL; + * rcu_read_unlock(); + * return p; + * + * put() + * if (!atomic_dec_return(&->refcnt)) { + * remove_ptr(p); + * kfree_rcu((p, rcu); + * } + * + * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has + * O(N^2) behaviour under contention with N concurrent operations. + * + * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales + * better under contention. + * + * Why not refcount? + * ================= + * + * In principle it should be possible to make refcount use the rcuref + * scheme, but the destruction race described below cannot be prevented + * unless the protected object is RCU managed. + * + * Theory of operation + * =================== + * + * rcuref uses an unsigned integer reference counter. As long as the + * counter value is greater than or equal to RCUREF_ONEREF and not larger + * than RCUREF_MAXREF the reference is alive: + * + * ONEREF MAXREF SATURATED RELEASED DEAD NOREF + * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF + * <---valid --------> <-------saturation zone-------> <-----dead zone-----> + * + * The get() and put() operations do unconditional increments and + * decrements. The result is checked after the operation. This optimizes + * for the fast path. + * + * If the reference count is saturated or dead, then the increments and + * decrements are not harmful as the reference count still stays in the + * respective zones and is always set back to STATURATED resp. DEAD. The + * zones have room for 2^28 racing operations in each direction, which + * makes it practically impossible to escape the zones. + * + * Once the last reference is dropped the reference count becomes + * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The + * slowpath then tries to set the reference count from RCUREF_NOREF to + * RCUREF_DEAD via a cmpxchg(). This opens a small window where a + * concurrent rcuref_get() can acquire the reference count and bring it + * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. + * + * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in + * DEAD + 1, which is inside the dead zone. If that happens the reference + * count is put back to DEAD. + * + * The actual race is possible due to the unconditional increment and + * decrements in rcuref_get() and rcuref_put(): + * + * T1 T2 + * get() put() + * if (atomic_add_negative(-1, &ref->refcnt)) + * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); + * + * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1 + * + * As the result of T1's add is negative, the get() goes into the slow path + * and observes refcnt being in the dead zone which makes the operation fail. + * + * Possible critical states: + * + * Context Counter References Operation + * T1 0 1 init() + * T2 1 2 get() + * T1 0 1 put() + * T2 -1 0 put() tries to mark dead + * T1 0 1 get() + * T2 0 1 put() mark dead fails + * T1 -1 0 put() tries to mark dead + * T1 DEAD 0 put() mark dead succeeds + * T2 DEAD+1 0 get() fails and puts it back to DEAD + * + * Of course there are more complex scenarios, but the above illustrates + * the working principle. The rest is left to the imagination of the + * reader. + * + * Deconstruction race + * =================== + * + * The release operation must be protected by prohibiting a grace period in + * order to prevent a possible use after free: + * + * T1 T2 + * put() get() + * // ref->refcnt = ONEREF + * if (!atomic_add_negative(-1, &ref->refcnt)) + * return false; <- Not taken + * + * // ref->refcnt == NOREF + * --> preemption + * // Elevates ref->refcnt to ONEREF + * if (!atomic_add_negative(1, &ref->refcnt)) + * return true; <- taken + * + * if (put(&p->ref)) { <-- Succeeds + * remove_pointer(p); + * kfree_rcu(p, rcu); + * } + * + * RCU grace period ends, object is freed + * + * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF + * + * This is prevented by disabling preemption around the put() operation as + * that's in most kernel configurations cheaper than a rcu_read_lock() / + * rcu_read_unlock() pair and in many cases even a NOOP. In any case it + * prevents the grace period which keeps the object alive until all put() + * operations complete. + * + * Saturation protection + * ===================== + * + * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). + * Once this is exceedded the reference count becomes stale by setting it + * to RCUREF_SATURATED, which will cause a memory leak, but it prevents + * wrap arounds which obviously cause worse problems than a memory + * leak. When saturation is reached a warning is emitted. + * + * Race conditions + * =============== + * + * All reference count increment/decrement operations are unconditional and + * only verified after the fact. This optimizes for the good case and takes + * the occasional race vs. a dead or already saturated refcount into + * account. The saturation and dead zones are large enough to accomodate + * for that. + * + * Memory ordering + * =============== + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object to increase the + * reference count on will provide the ordering. For locked data + * structures, its the lock acquire, for RCU/lockless data structures its + * the dependent load. + * + * rcuref_get() provides a control dependency ordering future stores which + * ensures that the object is not modified when acquiring a reference + * fails. + * + * rcuref_put() provides release order, i.e. all prior loads and stores + * will be issued before. It also provides a control dependency ordering + * against the subsequent destruction of the object. + * + * If rcuref_put() successfully dropped the last reference and marked the + * object DEAD it also provides acquire ordering. + */ + +#include <linux/export.h> +#include <linux/rcuref.h> + +/** + * rcuref_get_slowpath - Slowpath of rcuref_get() + * @ref: Pointer to the reference count + * + * Invoked when the reference count is outside of the valid zone. + * + * Return: + * False if the reference count was already marked dead + * + * True if the reference count is saturated, which prevents the + * object from being deconstructed ever. + */ +bool rcuref_get_slowpath(rcuref_t *ref) +{ + unsigned int cnt = atomic_read(&ref->refcnt); + + /* + * If the reference count was already marked dead, undo the + * increment so it stays in the middle of the dead zone and return + * fail. + */ + if (cnt >= RCUREF_RELEASED) { + atomic_set(&ref->refcnt, RCUREF_DEAD); + return false; + } + + /* + * If it was saturated, warn and mark it so. In case the increment + * was already on a saturated value restore the saturation + * marker. This keeps it in the middle of the saturation zone and + * prevents the reference count from overflowing. This leaks the + * object memory, but prevents the obvious reference count overflow + * damage. + */ + if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) + atomic_set(&ref->refcnt, RCUREF_SATURATED); + return true; +} +EXPORT_SYMBOL_GPL(rcuref_get_slowpath); + +/** + * rcuref_put_slowpath - Slowpath of __rcuref_put() + * @ref: Pointer to the reference count + * + * Invoked when the reference count is outside of the valid zone. + * + * Return: + * True if this was the last reference with no future references + * possible. This signals the caller that it can safely schedule the + * object, which is protected by the reference counter, for + * deconstruction. + * + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * deconstruct the protected object. + */ +bool rcuref_put_slowpath(rcuref_t *ref) +{ + unsigned int cnt = atomic_read(&ref->refcnt); + + /* Did this drop the last reference? */ + if (likely(cnt == RCUREF_NOREF)) { + /* + * Carefully try to set the reference count to RCUREF_DEAD. + * + * This can fail if a concurrent get() operation has + * elevated it again or the corresponding put() even marked + * it dead already. Both are valid situations and do not + * require a retry. If this fails the caller is not + * allowed to deconstruct the object. + */ + if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF) + return false; + + /* + * The caller can safely schedule the object for + * deconstruction. Provide acquire ordering. + */ + smp_acquire__after_ctrl_dep(); + return true; + } + + /* + * If the reference count was already in the dead zone, then this + * put() operation is imbalanced. Warn, put the reference count back to + * DEAD and tell the caller to not deconstruct the object. + */ + if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { + atomic_set(&ref->refcnt, RCUREF_DEAD); + return false; + } + + /* + * This is a put() operation on a saturated refcount. Restore the + * mean saturation value and tell the caller to not deconstruct the + * object. + */ + if (cnt > RCUREF_MAXREF) + atomic_set(&ref->refcnt, RCUREF_SATURATED); + return false; +} +EXPORT_SYMBOL_GPL(rcuref_put_slowpath); diff --git a/lib/show_mem.c b/lib/show_mem.c index 0d7585cde2a6..1485c87be935 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -10,26 +10,19 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { - pg_data_t *pgdat; unsigned long total = 0, reserved = 0, highmem = 0; + struct zone *zone; printk("Mem-Info:\n"); __show_free_areas(filter, nodemask, max_zone_idx); - for_each_online_pgdat(pgdat) { - int zoneid; + for_each_populated_zone(zone) { - for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { - struct zone *zone = &pgdat->node_zones[zoneid]; - if (!populated_zone(zone)) - continue; + total += zone->present_pages; + reserved += zone->present_pages - zone_managed_pages(zone); - total += zone->present_pages; - reserved += zone->present_pages - zone_managed_pages(zone); - - if (is_highmem_idx(zoneid)) - highmem += zone->present_pages; - } + if (is_highmem(zone)) + highmem += zone->present_pages; } printk("%lu pages RAM\n", total); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 036da8e295d1..2f5aa851834e 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -17,6 +17,7 @@ #include <linux/gfp.h> #include <linux/jhash.h> #include <linux/kernel.h> +#include <linux/kmsan.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/percpu.h> @@ -306,6 +307,11 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); pool_offset += required_size; + /* + * Let KMSAN know the stored stack record is initialized. This shall + * prevent false positive reports if instrumented code accesses it. + */ + kmsan_unpoison_memory(stack, required_size); return stack; } @@ -465,6 +471,12 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, struct stack_record *stack; *entries = NULL; + /* + * Let KMSAN know *entries is initialized. This shall prevent false + * positive reports if instrumented code accesses it. + */ + kmsan_unpoison_memory(entries, sizeof(*entries)); + if (!handle) return 0; diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 41d3447bc3b4..9a68849a5d55 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -587,7 +587,7 @@ static int __init test_string_helpers_init(void) for (i = 0; i < UNESCAPE_ALL_MASK + 1; i++) test_string_unescape("unescape", i, false); test_string_unescape("unescape inplace", - get_random_u32_below(UNESCAPE_ANY + 1), true); + get_random_u32_below(UNESCAPE_ALL_MASK + 1), true); /* Without dictionary */ for (i = 0; i < ESCAPE_ALL_MASK + 1; i++) diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c index 0fe5273e960b..079435a2e26c 100644 --- a/lib/test_fprobe.c +++ b/lib/test_fprobe.c @@ -269,4 +269,3 @@ static struct kunit_suite fprobe_test_suite = { kunit_test_suites(&fprobe_test_suite); -MODULE_LICENSE("GPL"); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 3d19b1f78d71..f1db333270e9 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2670,6 +2670,49 @@ static noinline void check_empty_area_window(struct maple_tree *mt) rcu_read_unlock(); } +static noinline void check_empty_area_fill(struct maple_tree *mt) +{ + const unsigned long max = 0x25D78000; + unsigned long size; + int loop, shift; + MA_STATE(mas, mt, 0, 0); + + mt_set_non_kernel(99999); + for (shift = 12; shift <= 16; shift++) { + loop = 5000; + size = 1 << shift; + while (loop--) { + mas_set(&mas, 0); + mas_lock(&mas); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != 0); + MT_BUG_ON(mt, mas.last != mas.index + size - 1); + mas_store_gfp(&mas, (void *)size, GFP_KERNEL); + mas_unlock(&mas); + mas_reset(&mas); + } + } + + /* No space left. */ + size = 0x1000; + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != -EBUSY); + rcu_read_unlock(); + + /* Fill a depth 3 node to the maximum */ + for (unsigned long i = 629440511; i <= 629440800; i += 6) + mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL); + /* Make space in the second-last depth 4 node */ + mtree_erase(mt, 631668735); + /* Make space in the last depth 4 node */ + mtree_erase(mt, 629506047); + mas_reset(&mas); + /* Search from just after the gap in the second-last depth 4 */ + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 629506048, 690000000, 0x5000) != 0); + rcu_read_unlock(); + mt_set_non_kernel(0); +} + static DEFINE_MTREE(tree); static int maple_tree_seed(void) { @@ -2926,6 +2969,11 @@ static int maple_tree_seed(void) check_empty_area_window(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_empty_area_fill(&tree); + mtree_destroy(&tree); + + #if defined(BENCH) skip: #endif diff --git a/lib/test_printf.c b/lib/test_printf.c index 46b4e6c414a3..7677ebccf3c3 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -642,12 +642,26 @@ page_flags_test(int section, int node, int zone, int last_cpupid, test(cmp_buf, "%pGp", &flags); } +static void __init page_type_test(unsigned int page_type, const char *name, + char *cmp_buf) +{ + unsigned long size; + + size = scnprintf(cmp_buf, BUF_SIZE, "%#x(", page_type); + if (page_type_has_type(page_type)) + size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s", name); + + snprintf(cmp_buf + size, BUF_SIZE - size, ")"); + test(cmp_buf, "%pGt", &page_type); +} + static void __init flags(void) { unsigned long flags; char *cmp_buffer; gfp_t gfp; + unsigned int page_type; cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); if (!cmp_buffer) @@ -687,6 +701,18 @@ flags(void) gfp |= __GFP_HIGH; test(cmp_buffer, "%pGg", &gfp); + page_type = ~0; + page_type_test(page_type, "", cmp_buffer); + + page_type = 10; + page_type_test(page_type, "", cmp_buffer); + + page_type = ~PG_buddy; + page_type_test(page_type, "buddy", cmp_buffer); + + page_type = ~(PG_table | PG_buddy); + page_type_test(page_type, "table|buddy", cmp_buffer); + kfree(cmp_buffer); } diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index de4ee0d50906..9dd9745d365f 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -53,6 +53,7 @@ __param(int, run_test_mask, INT_MAX, "\t\tid: 128, name: pcpu_alloc_test\n" "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" + "\t\tid: 1024, name: vm_map_ram_test\n" /* Add a new test case description here. */ ); @@ -334,7 +335,7 @@ kvfree_rcu_1_arg_vmalloc_test(void) return -1; p->array[0] = 'a'; - kvfree_rcu(p); + kvfree_rcu_mightsleep(p); } return 0; @@ -358,6 +359,41 @@ kvfree_rcu_2_arg_vmalloc_test(void) return 0; } +static int +vm_map_ram_test(void) +{ + unsigned long nr_allocated; + unsigned int map_nr_pages; + unsigned char *v_ptr; + struct page **pages; + int i; + + map_nr_pages = nr_pages > 0 ? nr_pages:1; + pages = kmalloc(map_nr_pages * sizeof(struct page), GFP_KERNEL); + if (!pages) + return -1; + + nr_allocated = alloc_pages_bulk_array(GFP_KERNEL, map_nr_pages, pages); + if (nr_allocated != map_nr_pages) + goto cleanup; + + /* Run the test loop. */ + for (i = 0; i < test_loop_count; i++) { + v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE); + *v_ptr = 'a'; + vm_unmap_ram(v_ptr, map_nr_pages); + } + +cleanup: + for (i = 0; i < nr_allocated; i++) + __free_page(pages[i]); + + kfree(pages); + + /* 0 indicates success. */ + return nr_allocated != map_nr_pages; +} + struct test_case_desc { const char *test_name; int (*test_func)(void); @@ -374,6 +410,7 @@ static struct test_case_desc test_case_array[] = { { "pcpu_alloc_test", pcpu_alloc_test }, { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, + { "vm_map_ram_test", vm_map_ram_test }, /* Add a new test case here. */ }; diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index e814061d6aa0..9f031eafc465 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -5,18 +5,13 @@ GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH)) c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c) -# This cmd checks that the vdso library does not contain absolute relocation +# This cmd checks that the vdso library does not contain dynamic relocations. # It has to be called after the linking of the vdso library and requires it # as a parameter. # -# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds -# to the absolute relocation types printed by "objdump -R" and accepted by the -# dynamic linker. -ifndef ARCH_REL_TYPE_ABS -$(error ARCH_REL_TYPE_ABS is not set) -endif - +# As a workaround for some GNU ld ports which produce unneeded R_*_NONE +# dynamic relocations, ignore R_*_NONE. quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(READELF) -rW $@ | grep -v _NONE | grep -q " R_\w*_"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi diff --git a/lib/vsprintf.c b/lib/vsprintf.c index be71a03c936a..40f560959b16 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2052,6 +2052,25 @@ char *format_page_flags(char *buf, char *end, unsigned long flags) return buf; } +static +char *format_page_type(char *buf, char *end, unsigned int page_type) +{ + buf = number(buf, end, page_type, default_flag_spec); + + if (buf < end) + *buf = '('; + buf++; + + if (page_type_has_type(page_type)) + buf = format_flags(buf, end, ~page_type, pagetype_names); + + if (buf < end) + *buf = ')'; + buf++; + + return buf; +} + static noinline_for_stack char *flags_string(char *buf, char *end, void *flags_ptr, struct printf_spec spec, const char *fmt) @@ -2065,6 +2084,8 @@ char *flags_string(char *buf, char *end, void *flags_ptr, switch (fmt[1]) { case 'p': return format_page_flags(buf, end, *(unsigned long *)flags_ptr); + case 't': + return format_page_type(buf, end, *(unsigned int *)flags_ptr); case 'v': flags = *(unsigned long *)flags_ptr; names = vmaflag_names; @@ -3621,7 +3642,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) if (!digit || (base == 16 && !isxdigit(digit)) || (base == 10 && !isdigit(digit)) - || (base == 8 && (!isdigit(digit) || digit > '7')) + || (base == 8 && !isodigit(digit)) || (base == 0 && !isdigit(digit))) break; |