From 58e698af4c6347c726090d5480b2e51d1d07edf9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 17 Mar 2016 14:18:36 -0700 Subject: radix-tree: account radix_tree_node to memory cgroup Allocation of radix_tree_node objects can be easily triggered from userspace, so we should account them to memory cgroup. Besides, we need them accounted for making shadow node shrinker per memcg (see mm/workingset.c). A tricky thing about accounting radix_tree_node objects is that they are mostly allocated through radix_tree_preload(), so we can't just set SLAB_ACCOUNT for radix_tree_node_cachep - that would likely result in a lot of unrelated cgroups using objects from each other's caches. One way to overcome this would be making radix tree preloads per memcg, but that would probably look cumbersome and overcomplicated. Instead, we make radix_tree_node_alloc() first try to allocate from the cache with __GFP_ACCOUNT, no matter if the caller has preloaded or not, and only if it fails fall back on using per cpu preloads. This should make most allocations accounted. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6b79e9026e24..224b369f5a5e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -191,6 +191,15 @@ radix_tree_node_alloc(struct radix_tree_root *root) if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { struct radix_tree_preload *rtp; + /* + * Even if the caller has preloaded, try to allocate from the + * cache first for the new node to get accounted. + */ + ret = kmem_cache_alloc(radix_tree_node_cachep, + gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); + if (ret) + goto out; + /* * Provided the caller has preloaded here, we will always * succeed in getting a node here (and never reach @@ -208,10 +217,11 @@ radix_tree_node_alloc(struct radix_tree_root *root) * for debugging. */ kmemleak_update_trace(ret); + goto out; } - if (ret == NULL) - ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); - + ret = kmem_cache_alloc(radix_tree_node_cachep, + gfp_mask | __GFP_ACCOUNT); +out: BUG_ON(radix_tree_is_indirect_ptr(ret)); return ret; } -- cgit v1.2.3 From d7b85cab74edef84a9330c476478ba8cd732b6a9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Mar 2016 14:21:38 -0700 Subject: lib/bug.c: make panic_on_warn available for all architectures Christian Borntraeger reported that panic_on_warn doesn't have any effect on s390. The panic_on_warn feature was introduced with 9e3961a09798 ("kernel: add panic_on_warn"). However it did care only for the case when WANT_WARN_ON_SLOWPATH is defined. This is turn is only the case for architectures which do not have an own __WARN_TAINT defined. Other architectures which do have __WARN_TAINT defined call report_bug() for warnings within lib/bug.c which does not call panic() in case panic_on_warn is set. Let's simply enable the panic_on_warn feature by adding the same code like it was added to warn_slowpath_common() in panic.c. This enables panic_on_warn also for arm64, parisc, powerpc, s390 and sh. Signed-off-by: Heiko Carstens Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Acked-by: Prarit Bhargava Cc: Catalin Marinas Cc: Will Deacon Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Tested-by: Michael Ellerman (powerpc) Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index cff145f032a5..6cde380f09de 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -175,6 +175,17 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) pr_warn("WARNING: at %p [verbose debug info unavailable]\n", (void *)bugaddr); + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from + * panicking the system on this thread. Other threads + * are blocked by the panic_mutex in panic(). + */ + panic_on_warn = 0; + panic("panic_on_warn set ...\n"); + } + print_modules(); show_regs(regs); print_oops_end_marker(); -- cgit v1.2.3 From 339e6353046dd4f675304d696a88aefdd727298e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:48 -0700 Subject: radix_tree: tag all internal tree nodes as indirect pointers Set the 'indirect_ptr' bit on all the pointers to internal nodes, not just on the root node. This enables the following patches to support multi-order entries in the radix tree. This patch is split out for ease of bisection. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 24 ++++++++++++++++++------ tools/testing/radix-tree/test.c | 5 +++-- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 224b369f5a5e..ff91792346f6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -368,9 +368,10 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) node->count = 1; node->parent = NULL; slot = root->rnode; - if (newheight > 1) { + if (radix_tree_is_indirect_ptr(slot) && newheight > 1) { slot = indirect_to_ptr(slot); slot->parent = node; + slot = ptr_to_indirect(slot); } node->slots[0] = slot; node = ptr_to_indirect(node); @@ -425,17 +426,20 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot->path = height; slot->parent = node; if (node) { - rcu_assign_pointer(node->slots[offset], slot); + rcu_assign_pointer(node->slots[offset], + ptr_to_indirect(slot)); node->count++; slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else - rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); + rcu_assign_pointer(root->rnode, + ptr_to_indirect(slot)); } /* Go a level down */ offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = slot; slot = node->slots[offset]; + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -533,6 +537,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; + node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -619,6 +624,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, tag_set(slot, tag, offset); slot = slot->slots[offset]; BUG_ON(slot == NULL); + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -658,11 +664,12 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, goto out; shift = height * RADIX_TREE_MAP_SHIFT; - slot = indirect_to_ptr(root->rnode); + slot = root->rnode; while (shift) { if (slot == NULL) goto out; + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -738,6 +745,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (node == NULL) return 0; + node = indirect_to_ptr(node); offset = (index >> shift) & RADIX_TREE_MAP_MASK; if (!tag_get(node, tag, offset)) @@ -838,6 +846,7 @@ restart: node = rcu_dereference_raw(node->slots[offset]); if (node == NULL) goto restart; + node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -939,6 +948,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; node = slot; slot = slot->slots[offset]; + slot = indirect_to_ptr(slot); continue; } @@ -1195,6 +1205,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) goto out; + slot = indirect_to_ptr(slot); } /* Bottom level: check items */ @@ -1278,7 +1289,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) */ if (to_free->count != 1) break; - if (!to_free->slots[0]) + slot = to_free->slots[0]; + if (!slot) break; /* @@ -1288,8 +1300,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - slot = to_free->slots[0]; if (root->height > 1) { + slot = indirect_to_ptr(slot); slot->parent = NULL; slot = ptr_to_indirect(slot); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index c9b0bd75b6c6..2bebf34cdc27 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -142,6 +142,8 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, int i; int j; + slot = indirect_to_ptr(slot); + /* Verify consistency at this level */ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { if (slot->tags[tag][i]) { @@ -184,8 +186,7 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { if (!root->height) return; - verify_node(indirect_to_ptr(root->rnode), - tag, root->height, !!root_tag_get(root, tag)); + verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag)); } void item_kill_tree(struct radix_tree_root *root) -- cgit v1.2.3 From 0070e28d97e72aeac2a85f538d8a452400dfe1c7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:51 -0700 Subject: radix_tree: loop based on shift count, not height When we introduce entries that can cover multiple indices, we will need to stop in __radix_tree_create based on the shift, not the height. Split out for ease of bisect. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ff91792346f6..9bb440f317c9 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -415,10 +415,10 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = indirect_to_ptr(root->rnode); height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + shift = height * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ - while (height > 0) { + while (shift > 0) { if (slot == NULL) { /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) @@ -436,11 +436,11 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } /* Go a level down */ + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = slot; slot = node->slots[offset]; slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; height--; } -- cgit v1.2.3 From e61452365372570253b2b1de84bab0cdb2e62c64 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:54 -0700 Subject: radix_tree: add support for multi-order entries With huge pages, it is convenient to have the radix tree be able to return an entry that covers multiple indices. Previous attempts to deal with the problem have involved inserting N duplicate entries, which is a waste of memory and leads to problems trying to handle aliased tags, or probing the tree multiple times to find alternative entries which might cover the requested index. This approach inserts one canonical entry into the tree for a given range of indices, and may also insert other entries in order to ensure that lookups find the canonical entry. This solution only tolerates inserting powers of two that are greater than the fanout of the tree. If we wish to expand the radix tree's abilities to support large-ish pages that is less than the fanout at the penultimate level of the tree, then we would need to add one more step in lookup to ensure that any sibling nodes in the final level of the tree are dereferenced and we return the canonical entry that they reference. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 11 ++++- lib/radix-tree.c | 109 ++++++++++++++++++++++++++++++++++----------- mm/filemap.c | 2 +- 3 files changed, 93 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 39598b9cf1d9..b211f145c811 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -271,8 +271,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item) } int __radix_tree_create(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node **nodep, void ***slotp); -int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); + unsigned order, struct radix_tree_node **nodep, + void ***slotp); +int __radix_tree_insert(struct radix_tree_root *, unsigned long index, + unsigned order, void *); +static inline int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *entry) +{ + return __radix_tree_insert(root, index, 0, entry); +} void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void ***slotp); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9bb440f317c9..d907dca302d5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) /* * Extend a radix tree so it can store key @index. */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) +static int radix_tree_extend(struct radix_tree_root *root, + unsigned long index, unsigned order) { struct radix_tree_node *node; struct radix_tree_node *slot; @@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) while (index > radix_tree_maxindex(height)) height++; - if (root->rnode == NULL) { + if ((root->rnode == NULL) && (order == 0)) { root->height = height; goto out; } @@ -386,6 +387,7 @@ out: * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key + * @order: index occupies 2^order aligned slots * @nodep: returns node * @slotp: returns slot * @@ -399,26 +401,29 @@ out: * Returns -ENOMEM, or 0 for success. */ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node **nodep, void ***slotp) + unsigned order, struct radix_tree_node **nodep, + void ***slotp) { struct radix_tree_node *node = NULL, *slot; unsigned int height, shift, offset; int error; + BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT)); + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index); + error = radix_tree_extend(root, index, order); if (error) return error; } - slot = indirect_to_ptr(root->rnode); + slot = root->rnode; height = root->height; shift = height * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ - while (shift > 0) { + while (shift > order) { if (slot == NULL) { /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) @@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); - } + } else if (!radix_tree_is_indirect_ptr(slot)) + break; /* Go a level down */ + height--; shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; + node = indirect_to_ptr(slot); slot = node->slots[offset]; - slot = indirect_to_ptr(slot); - height--; + } + + /* Insert pointers to the canonical entry */ + if ((shift - order) > 0) { + int i, n = 1 << (shift - order); + offset = offset & ~(n - 1); + slot = ptr_to_indirect(&node->slots[offset]); + for (i = 0; i < n; i++) { + if (node->slots[offset + i]) + return -EEXIST; + } + + for (i = 1; i < n; i++) { + rcu_assign_pointer(node->slots[offset + i], slot); + node->count++; + } } if (nodep) @@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } /** - * radix_tree_insert - insert into a radix tree + * __radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key + * @order: key covers the 2^order indices around index * @item: item to insert * * Insert an item into the radix tree at position @index. */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) +int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, + unsigned order, void *item) { struct radix_tree_node *node; void **slot; @@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root, BUG_ON(radix_tree_is_indirect_ptr(item)); - error = __radix_tree_create(root, index, &node, &slot); + error = __radix_tree_create(root, index, order, &node, &slot); if (error) return error; if (*slot != NULL) @@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root, return 0; } -EXPORT_SYMBOL(radix_tree_insert); +EXPORT_SYMBOL(__radix_tree_insert); /** * __radix_tree_lookup - lookup an item in a radix tree @@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; + if (!radix_tree_is_indirect_ptr(node)) + break; node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; @@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root, tag_set(slot, tag, offset); slot = slot->slots[offset]; BUG_ON(slot == NULL); + if (!radix_tree_is_indirect_ptr(slot)) + break; slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, while (shift) { if (slot == NULL) goto out; + if (!radix_tree_is_indirect_ptr(slot)) + break; slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; @@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (height == 1) return 1; node = rcu_dereference_raw(node->slots[offset]); + if (!radix_tree_is_indirect_ptr(node)) + return 1; shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -813,6 +843,7 @@ restart: node = rnode; while (1) { + struct radix_tree_node *slot; if ((flags & RADIX_TREE_ITER_TAGGED) ? !test_bit(offset, node->tags[tag]) : !node->slots[offset]) { @@ -843,10 +874,12 @@ restart: if (!shift) break; - node = rcu_dereference_raw(node->slots[offset]); - if (node == NULL) + slot = rcu_dereference_raw(node->slots[offset]); + if (slot == NULL) goto restart; - node = indirect_to_ptr(node); + if (!radix_tree_is_indirect_ptr(slot)) + break; + node = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, if (!tag_get(slot, iftag, offset)) goto next; if (shift) { - /* Go down one level */ - shift -= RADIX_TREE_MAP_SHIFT; node = slot; slot = slot->slots[offset]; - slot = indirect_to_ptr(slot); - continue; + if (radix_tree_is_indirect_ptr(slot)) { + slot = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; + continue; + } else { + slot = node; + node = node->parent; + } } /* tag the leaf */ - tagged++; + tagged += 1 << shift; tag_set(slot, settag, offset); /* walk back up the path tagging interior nodes */ @@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, goto out; } - shift -= RADIX_TREE_MAP_SHIFT; slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) goto out; + if (!radix_tree_is_indirect_ptr(slot)) { + if (slot == item) { + *found_index = index + i; + index = 0; + } else { + index += shift; + } + goto out; + } slot = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; } /* Bottom level: check items */ @@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) /* * The candidate node has more than one child, or its child - * is not at the leftmost slot, we cannot shrink. + * is not at the leftmost slot, or it is a multiorder entry, + * we cannot shrink. */ if (to_free->count != 1) break; @@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * one (root->rnode) as far as dependent read barriers go. */ if (root->height > 1) { + if (!radix_tree_is_indirect_ptr(slot)) + break; + slot = indirect_to_ptr(slot); slot->parent = NULL; slot = ptr_to_indirect(slot); @@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset; + unsigned int offset, i; void **slot; void *entry; int tag; @@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } + /* Delete any sibling slots pointing to this slot */ + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr_to_indirect(slot)) + break; + node->slots[offset + i] = NULL; + node->count--; + } node->slots[offset] = NULL; node->count--; diff --git a/mm/filemap.c b/mm/filemap.c index 61b441b191ad..084ad0fe73c7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -586,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping, void **slot; int error; - error = __radix_tree_create(&mapping->page_tree, page->index, + error = __radix_tree_create(&mapping->page_tree, page->index, 0, &node, &slot); if (error) return error; -- cgit v1.2.3 From 7cf19af4debc804e408b059d58df7c9c226ca6fb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:57 -0700 Subject: radix_tree: add radix_tree_dump This is debug code which is #if 0 out. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d907dca302d5..1624c4117961 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -173,6 +173,41 @@ radix_tree_find_next_bit(const unsigned long *addr, return size; } +#if 0 +static void dump_node(void *slot, int height, int offset) +{ + struct radix_tree_node *node; + int i; + + if (!slot) + return; + + if (height == 0) { + pr_debug("radix entry %p offset %d\n", slot, offset); + return; + } + + node = indirect_to_ptr(slot); + pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", + slot, offset, node->tags[0][0], node->tags[1][0], + node->tags[2][0], node->path, node->count, node->parent); + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) + dump_node(node->slots[i], height - 1, i); +} + +/* For debug */ +static void radix_tree_dump(struct radix_tree_root *root) +{ + pr_debug("radix root: %p height %d rnode %p tags %x\n", + root, root->height, root->rnode, + root->gfp_mask >> __GFP_BITS_SHIFT); + if (!radix_tree_is_indirect_ptr(root->rnode)) + return; + dump_node(root->rnode, root->height, 0); +} +#endif + /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. -- cgit v1.2.3 From 56b060814e2d87d6646a85a2f4609c73587399ca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Mar 2016 14:22:14 -0700 Subject: lib/string: introduce match_string() helper Occasionally we have to search for an occurrence of a string in an array of strings. Make a simple helper for that purpose. Signed-off-by: Andy Shevchenko Cc: "David S. Miller" Cc: Bartlomiej Zolnierkiewicz Cc: David Airlie Cc: David Woodhouse Cc: Dmitry Eremin-Solenikov Cc: Greg Kroah-Hartman Cc: Heikki Krogerus Cc: Linus Walleij Cc: Mika Westerberg Cc: Rafael J. Wysocki Cc: Sebastian Reichel Cc: Tejun Heo Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 ++ lib/string.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'lib') diff --git a/include/linux/string.h b/include/linux/string.h index 9eebc66d957a..0f235e80d355 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -130,6 +130,8 @@ extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); extern int strtobool(const char *s, bool *res); +int match_string(const char * const *array, size_t n, const char *string); + #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); diff --git a/lib/string.c b/lib/string.c index 0323c0d5629a..e9c9db161d2c 100644 --- a/lib/string.c +++ b/lib/string.c @@ -630,6 +630,32 @@ bool sysfs_streq(const char *s1, const char *s2) } EXPORT_SYMBOL(sysfs_streq); +/** + * match_string - matches given string in an array + * @array: array of strings + * @n: number of strings in the array or -1 for NULL terminated arrays + * @string: string to match with + * + * Return: + * index of a @string in the @array if matches, or %-EINVAL otherwise. + */ +int match_string(const char * const *array, size_t n, const char *string) +{ + int index; + const char *item; + + for (index = 0; index < n; index++) { + item = array[index]; + if (!item) + break; + if (!strcmp(item, string)) + return index; + } + + return -EINVAL; +} +EXPORT_SYMBOL(match_string); + /** * strtobool - convert common user inputs into boolean values * @s: input string -- cgit v1.2.3 From ef951599074ba4fad2d0efa0a977129b41e6d203 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 14:22:50 -0700 Subject: lib: move strtobool() to kstrtobool() Create the kstrtobool_from_user() helper and move strtobool() logic into the new kstrtobool() (matching all the other kstrto* functions). Provides an inline wrapper for existing strtobool() callers. Signed-off-by: Kees Cook Cc: Joe Perches Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Daniel Borkmann Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Cc: Kalle Valo Cc: Steve French Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ include/linux/string.h | 6 +++++- lib/kstrtox.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/string.c | 29 ----------------------------- 4 files changed, 57 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f31638c6e873..f4fa2b29c38c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,6 +357,7 @@ int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); +int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); @@ -368,6 +369,7 @@ int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigne int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); +int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { diff --git a/include/linux/string.h b/include/linux/string.h index 0f235e80d355..d3993a79a325 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -128,7 +128,11 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); -extern int strtobool(const char *s, bool *res); +extern int kstrtobool(const char *s, bool *res); +static inline int strtobool(const char *s, bool *res) +{ + return kstrtobool(s, res); +} int match_string(const char * const *array, size_t n, const char *string); diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 94be244e8441..e8ba4a013e82 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -321,6 +321,56 @@ int kstrtos8(const char *s, unsigned int base, s8 *res) } EXPORT_SYMBOL(kstrtos8); +/** + * kstrtobool - convert common user inputs into boolean values + * @s: input string + * @res: result + * + * This routine returns 0 iff the first character is one of 'Yy1Nn0'. + * Otherwise it will return -EINVAL. Value pointed to by res is + * updated upon finding a match. + */ +int kstrtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + default: + break; + } + + return -EINVAL; +} +EXPORT_SYMBOL(kstrtobool); + +/* + * Since "base" would be a nonsense argument, this open-codes the + * _from_user helper instead of using the helper macro below. + */ +int kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + /* Longest string needed to differentiate, newline, terminator */ + char buf[4]; + + count = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + return kstrtobool(buf, res); +} +EXPORT_SYMBOL(kstrtobool_from_user); + #define kstrto_from_user(f, g, type) \ int f(const char __user *s, size_t count, unsigned int base, type *res) \ { \ diff --git a/lib/string.c b/lib/string.c index e9c9db161d2c..ed83562a53ae 100644 --- a/lib/string.c +++ b/lib/string.c @@ -656,35 +656,6 @@ int match_string(const char * const *array, size_t n, const char *string) } EXPORT_SYMBOL(match_string); -/** - * strtobool - convert common user inputs into boolean values - * @s: input string - * @res: result - * - * This routine returns 0 iff the first character is one of 'Yy1Nn0'. - * Otherwise it will return -EINVAL. Value pointed to by res is - * updated upon finding a match. - */ -int strtobool(const char *s, bool *res) -{ - switch (s[0]) { - case 'y': - case 'Y': - case '1': - *res = true; - break; - case 'n': - case 'N': - case '0': - *res = false; - break; - default: - return -EINVAL; - } - return 0; -} -EXPORT_SYMBOL(strtobool); - #ifndef __HAVE_ARCH_MEMSET /** * memset - Fill a region of memory with the given value -- cgit v1.2.3 From a81a5a17d44b26521fb1199f8ccf27f4af337a67 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 14:22:57 -0700 Subject: lib: add "on"/"off" support to kstrtobool Add support for "on" and "off" when converting to boolean. Signed-off-by: Kees Cook Cc: Amitkumar Karwar Cc: Andy Shevchenko Cc: Daniel Borkmann Cc: Heiko Carstens Cc: Joe Perches Cc: Kalle Valo Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Nishant Sarmukadam Cc: Rasmus Villemoes Cc: Steve French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/kstrtox.c b/lib/kstrtox.c index e8ba4a013e82..d8a5cf66c316 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -326,9 +326,9 @@ EXPORT_SYMBOL(kstrtos8); * @s: input string * @res: result * - * This routine returns 0 iff the first character is one of 'Yy1Nn0'. - * Otherwise it will return -EINVAL. Value pointed to by res is - * updated upon finding a match. + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or + * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value + * pointed to by res is updated upon finding a match. */ int kstrtobool(const char *s, bool *res) { @@ -346,6 +346,20 @@ int kstrtobool(const char *s, bool *res) case '0': *res = false; return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } default: break; } -- cgit v1.2.3 From 2553b67a1fbe7bf202e4e8070ab0b00d3d3a06a2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Mar 2016 14:23:04 -0700 Subject: lib/bug.c: use common WARN helper The traceoff_on_warning option doesn't have any effect on s390, powerpc, arm64, parisc, and sh because there are two different types of WARN implementations: 1) The above mentioned architectures treat WARN() as a special case of a BUG() exception. They handle warnings in report_bug() in lib/bug.c. 2) All other architectures just call warn_slowpath_*() directly. Their warnings are handled in warn_slowpath_common() in kernel/panic.c. Support traceoff_on_warning on all architectures and prevent any future divergence by using a single common function to emit the warning. Also remove the '()' from '%pS()', because the parentheses look funky: [ 45.607629] WARNING: at /root/warn_mod/warn_mod.c:17 .init_dummy+0x20/0x40 [warn_mod]() Reported-by: Chunyu Hu Signed-off-by: Josh Poimboeuf Acked-by: Heiko Carstens Tested-by: Prarit Bhargava Acked-by: Prarit Bhargava Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 6 ++++++ kernel/panic.c | 41 ++++++++++++++++++++++++++--------------- lib/bug.c | 26 ++------------------------ 3 files changed, 34 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index fdf6fa078422..f90588abbfd4 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -81,6 +81,12 @@ extern void warn_slowpath_null(const char *file, const int line); do { printk(arg); __WARN_TAINT(taint); } while (0) #endif +/* used internally by panic.c */ +struct warn_args; + +void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args); + #ifndef WARN_ON #define WARN_ON(condition) ({ \ int __ret_warn_on = !!(condition); \ diff --git a/kernel/panic.c b/kernel/panic.c index d96469de72dc..fa400852bf6c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -24,6 +24,7 @@ #include #include #include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -449,20 +450,25 @@ void oops_exit(void) kmsg_dump(KMSG_DUMP_OOPS); } -#ifdef WANT_WARN_ON_SLOWPATH -struct slowpath_args { +struct warn_args { const char *fmt; va_list args; }; -static void warn_slowpath_common(const char *file, int line, void *caller, - unsigned taint, struct slowpath_args *args) +void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args) { disable_trace_on_warning(); pr_warn("------------[ cut here ]------------\n"); - pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", - raw_smp_processor_id(), current->pid, file, line, caller); + + if (file) + pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", + raw_smp_processor_id(), current->pid, file, line, + caller); + else + pr_warn("WARNING: CPU: %d PID: %d at %pS\n", + raw_smp_processor_id(), current->pid, caller); if (args) vprintk(args->fmt, args->args); @@ -479,20 +485,27 @@ static void warn_slowpath_common(const char *file, int line, void *caller, } print_modules(); - dump_stack(); + + if (regs) + show_regs(regs); + else + dump_stack(); + print_oops_end_marker(); + /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); } +#ifdef WANT_WARN_ON_SLOWPATH void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) { - struct slowpath_args args; + struct warn_args args; args.fmt = fmt; va_start(args.args, fmt); - warn_slowpath_common(file, line, __builtin_return_address(0), - TAINT_WARN, &args); + __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, + &args); va_end(args.args); } EXPORT_SYMBOL(warn_slowpath_fmt); @@ -500,20 +513,18 @@ EXPORT_SYMBOL(warn_slowpath_fmt); void warn_slowpath_fmt_taint(const char *file, int line, unsigned taint, const char *fmt, ...) { - struct slowpath_args args; + struct warn_args args; args.fmt = fmt; va_start(args.args, fmt); - warn_slowpath_common(file, line, __builtin_return_address(0), - taint, &args); + __warn(file, line, __builtin_return_address(0), taint, NULL, &args); va_end(args.args); } EXPORT_SYMBOL(warn_slowpath_fmt_taint); void warn_slowpath_null(const char *file, int line) { - warn_slowpath_common(file, line, __builtin_return_address(0), - TAINT_WARN, NULL); + __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL); } EXPORT_SYMBOL(warn_slowpath_null); #endif diff --git a/lib/bug.c b/lib/bug.c index 6cde380f09de..bc3656e944d2 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -167,30 +167,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ - pr_warn("------------[ cut here ]------------\n"); - - if (file) - pr_warn("WARNING: at %s:%u\n", file, line); - else - pr_warn("WARNING: at %p [verbose debug info unavailable]\n", - (void *)bugaddr); - - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from - * panicking the system on this thread. Other threads - * are blocked by the panic_mutex in panic(). - */ - panic_on_warn = 0; - panic("panic_on_warn set ...\n"); - } - - print_modules(); - show_regs(regs); - print_oops_end_marker(); - /* Just a warning, don't kill lockdep. */ - add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK); + __warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs, + NULL); return BUG_TRAP_TYPE_WARN; } -- cgit v1.2.3 From f9310b2f9a19b7f16c7b1c1558f8b649b9b933c1 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Thu, 17 Mar 2016 14:23:07 -0700 Subject: sscanf: implement basic character sets Implement basic character sets for the '%[' conversion specifier. The '%[' conversion specifier matches a nonempty sequence of characters from the specified set of accepted (or with '^', rejected) characters between the brackets. The substring matched is to be made up of characters in (or not in) the set. This is useful for matching substrings that are delimited by something other than spaces. This implementation differs from its glibc counterpart in the following ways: (1) No support for character ranges (e.g., 'a-z' or '0-9') (2) The hyphen '-' is not a special character (3) The closing bracket ']' cannot be matched (4) No support (yet) for discarding matching input ('%*[') The bitmap code is largely based upon sample code which was provided by Rasmus. The motivation for adding character set support to sscanf originally stemmed from the kernel livepatching project. An ongoing patchset utilizes new livepatch Elf symbol and section names to store important metadata livepatch needs to properly apply its patches. Such metadata is stored in these section and symbol names as substrings delimited by periods '.' and commas ','. For example, a livepatch symbol name might look like this: .klp.sym.vmlinux.printk,0 However, sscanf currently can only extract "substrings" delimited by whitespace using the "%s" specifier. Thus for the above symbol name, one cannot not use sscanf() to extract substrings "vmlinux" or "printk", for example. A number of discussions on the livepatch mailing list dealing with string parsing code for extracting these '.' and ',' delimited substrings eventually led to the conclusion that such code would be completely unnecessary if the kernel sscanf() supported character sets. Thus only a single sscanf() call would be necessary to extract these substrings. In addition, such an addition to sscanf() could benefit other areas of the kernel that might have a similar need in the future. [akpm@linux-foundation.org: 80-col tweaks] Signed-off-by: Jessica Yu Signed-off-by: Rasmus Villemoes Cc: Andy Shevchenko Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 525c8e19bda2..ccb664b54280 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2640,8 +2640,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args) if (*fmt == '*') { if (!*str) break; - while (!isspace(*fmt) && *fmt != '%' && *fmt) + while (!isspace(*fmt) && *fmt != '%' && *fmt) { + /* '%*[' not yet supported, invalid format */ + if (*fmt == '[') + return num; fmt++; + } while (!isspace(*str) && *str) str++; continue; @@ -2714,6 +2718,59 @@ int vsscanf(const char *buf, const char *fmt, va_list args) num++; } continue; + /* + * Warning: This implementation of the '[' conversion specifier + * deviates from its glibc counterpart in the following ways: + * (1) It does NOT support ranges i.e. '-' is NOT a special + * character + * (2) It cannot match the closing bracket ']' itself + * (3) A field width is required + * (4) '%*[' (discard matching input) is currently not supported + * + * Example usage: + * ret = sscanf("00:0a:95","%2[^:]:%2[^:]:%2[^:]", + * buf1, buf2, buf3); + * if (ret < 3) + * // etc.. + */ + case '[': + { + char *s = (char *)va_arg(args, char *); + DECLARE_BITMAP(set, 256) = {0}; + unsigned int len = 0; + bool negate = (*fmt == '^'); + + /* field width is required */ + if (field_width == -1) + return num; + + if (negate) + ++fmt; + + for ( ; *fmt && *fmt != ']'; ++fmt, ++len) + set_bit((u8)*fmt, set); + + /* no ']' or no character set found */ + if (!*fmt || !len) + return num; + ++fmt; + + if (negate) { + bitmap_complement(set, set, 256); + /* exclude null '\0' byte */ + clear_bit(0, set); + } + + /* match must be non-empty */ + if (!test_bit((u8)*str, set)) + return num; + + while (test_bit((u8)*str, set) && field_width--) + *s++ = *str++; + *s = '\0'; + ++num; + } + continue; case 'o': base = 8; break; -- cgit v1.2.3