summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2021-03-10 11:18:03 +0100
committerMike Snitzer <snitzer@redhat.com>2021-03-26 19:53:41 +0100
commitb82096afc8afdc55408efb54ede2ec55c1f3f8c8 (patch)
tree2e329cff5c720e94659027ad05b4f8336b3a6866 /drivers/md
parentdm verity: allow only one error handling mode (diff)
downloadlinux-b82096afc8afdc55408efb54ede2ec55c1f3f8c8.tar.xz
linux-b82096afc8afdc55408efb54ede2ec55c1f3f8c8.zip
dm ioctl: replace device hash with red-black tree
For high numbers of DM devices the 64-entry hash table has non-trivial overhead. Fix this by replacing the hash table with a red-black tree. Reported-by: Zdenek Kabelac <zkabelac@redhat.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-ioctl.c254
1 files changed, 142 insertions, 112 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1ca65b434f1f..272de8772d52 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/slab.h>
+#include <linux/rbtree.h>
#include <linux/dm-ioctl.h>
#include <linux/hdreg.h>
#include <linux/compat.h>
@@ -36,8 +37,10 @@ struct dm_file {
* name or uuid.
*---------------------------------------------------------------*/
struct hash_cell {
- struct list_head name_list;
- struct list_head uuid_list;
+ struct rb_node name_node;
+ struct rb_node uuid_node;
+ bool name_set;
+ bool uuid_set;
char *name;
char *uuid;
@@ -53,10 +56,8 @@ struct vers_iter {
};
-#define NUM_BUCKETS 64
-#define MASK_BUCKETS (NUM_BUCKETS - 1)
-static struct list_head _name_buckets[NUM_BUCKETS];
-static struct list_head _uuid_buckets[NUM_BUCKETS];
+static struct rb_root name_rb_tree = RB_ROOT;
+static struct rb_root uuid_rb_tree = RB_ROOT;
static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred);
@@ -70,73 +71,110 @@ static DECLARE_RWSEM(_hash_lock);
*/
static DEFINE_MUTEX(dm_hash_cells_mutex);
-static void init_buckets(struct list_head *buckets)
-{
- unsigned int i;
-
- for (i = 0; i < NUM_BUCKETS; i++)
- INIT_LIST_HEAD(buckets + i);
-}
-
-static int dm_hash_init(void)
-{
- init_buckets(_name_buckets);
- init_buckets(_uuid_buckets);
- return 0;
-}
-
static void dm_hash_exit(void)
{
dm_hash_remove_all(false, false, false);
}
/*-----------------------------------------------------------------
- * Hash function:
- * We're not really concerned with the str hash function being
- * fast since it's only used by the ioctl interface.
- *---------------------------------------------------------------*/
-static unsigned int hash_str(const char *str)
-{
- const unsigned int hash_mult = 2654435387U;
- unsigned int h = 0;
-
- while (*str)
- h = (h + (unsigned int) *str++) * hash_mult;
-
- return h & MASK_BUCKETS;
-}
-
-/*-----------------------------------------------------------------
* Code for looking up a device by name
*---------------------------------------------------------------*/
static struct hash_cell *__get_name_cell(const char *str)
{
- struct hash_cell *hc;
- unsigned int h = hash_str(str);
+ struct rb_node *n = name_rb_tree.rb_node;
- list_for_each_entry (hc, _name_buckets + h, name_list)
- if (!strcmp(hc->name, str)) {
+ while (n) {
+ struct hash_cell *hc = container_of(n, struct hash_cell, name_node);
+ int c = strcmp(hc->name, str);
+ if (!c) {
dm_get(hc->md);
return hc;
}
+ n = c >= 0 ? n->rb_left : n->rb_right;
+ }
return NULL;
}
static struct hash_cell *__get_uuid_cell(const char *str)
{
- struct hash_cell *hc;
- unsigned int h = hash_str(str);
+ struct rb_node *n = uuid_rb_tree.rb_node;
- list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
- if (!strcmp(hc->uuid, str)) {
+ while (n) {
+ struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node);
+ int c = strcmp(hc->uuid, str);
+ if (!c) {
dm_get(hc->md);
return hc;
}
+ n = c >= 0 ? n->rb_left : n->rb_right;
+ }
return NULL;
}
+static void __unlink_name(struct hash_cell *hc)
+{
+ if (hc->name_set) {
+ hc->name_set = false;
+ rb_erase(&hc->name_node, &name_rb_tree);
+ }
+}
+
+static void __unlink_uuid(struct hash_cell *hc)
+{
+ if (hc->uuid_set) {
+ hc->uuid_set = false;
+ rb_erase(&hc->uuid_node, &uuid_rb_tree);
+ }
+}
+
+static void __link_name(struct hash_cell *new_hc)
+{
+ struct rb_node **n, *parent;
+
+ __unlink_name(new_hc);
+
+ new_hc->name_set = true;
+
+ n = &name_rb_tree.rb_node;
+ parent = NULL;
+
+ while (*n) {
+ struct hash_cell *hc = container_of(*n, struct hash_cell, name_node);
+ int c = strcmp(hc->name, new_hc->name);
+ BUG_ON(!c);
+ parent = *n;
+ n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right;
+ }
+
+ rb_link_node(&new_hc->name_node, parent, n);
+ rb_insert_color(&new_hc->name_node, &name_rb_tree);
+}
+
+static void __link_uuid(struct hash_cell *new_hc)
+{
+ struct rb_node **n, *parent;
+
+ __unlink_uuid(new_hc);
+
+ new_hc->uuid_set = true;
+
+ n = &uuid_rb_tree.rb_node;
+ parent = NULL;
+
+ while (*n) {
+ struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node);
+ int c = strcmp(hc->uuid, new_hc->uuid);
+ BUG_ON(!c);
+ parent = *n;
+ n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right;
+ }
+
+ rb_link_node(&new_hc->uuid_node, parent, n);
+ rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree);
+}
+
static struct hash_cell *__get_dev_cell(uint64_t dev)
{
struct mapped_device *md;
@@ -185,8 +223,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
}
}
- INIT_LIST_HEAD(&hc->name_list);
- INIT_LIST_HEAD(&hc->uuid_list);
+ hc->name_set = hc->uuid_set = false;
hc->md = md;
hc->new_map = NULL;
return hc;
@@ -226,16 +263,16 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
goto bad;
}
- list_add(&cell->name_list, _name_buckets + hash_str(name));
+ __link_name(cell);
if (uuid) {
hc = __get_uuid_cell(uuid);
if (hc) {
- list_del(&cell->name_list);
+ __unlink_name(cell);
dm_put(hc->md);
goto bad;
}
- list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
+ __link_uuid(cell);
}
dm_get(md);
mutex_lock(&dm_hash_cells_mutex);
@@ -256,9 +293,9 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)
struct dm_table *table;
int srcu_idx;
- /* remove from the dev hash */
- list_del(&hc->uuid_list);
- list_del(&hc->name_list);
+ /* remove from the dev trees */
+ __unlink_name(hc);
+ __unlink_uuid(hc);
mutex_lock(&dm_hash_cells_mutex);
dm_set_mdptr(hc->md, NULL);
mutex_unlock(&dm_hash_cells_mutex);
@@ -279,7 +316,8 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)
static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred)
{
- int i, dev_skipped;
+ int dev_skipped;
+ struct rb_node *n;
struct hash_cell *hc;
struct mapped_device *md;
struct dm_table *t;
@@ -289,40 +327,39 @@ retry:
down_write(&_hash_lock);
- for (i = 0; i < NUM_BUCKETS; i++) {
- list_for_each_entry(hc, _name_buckets + i, name_list) {
- md = hc->md;
- dm_get(md);
+ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
+ hc = container_of(n, struct hash_cell, name_node);
+ md = hc->md;
+ dm_get(md);
- if (keep_open_devices &&
- dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
- dm_put(md);
- dev_skipped++;
- continue;
- }
+ if (keep_open_devices &&
+ dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
+ dm_put(md);
+ dev_skipped++;
+ continue;
+ }
- t = __hash_remove(hc);
+ t = __hash_remove(hc);
- up_write(&_hash_lock);
+ up_write(&_hash_lock);
- if (t) {
- dm_sync_table(md);
- dm_table_destroy(t);
- }
- dm_put(md);
- if (likely(keep_open_devices))
- dm_destroy(md);
- else
- dm_destroy_immediate(md);
-
- /*
- * Some mapped devices may be using other mapped
- * devices, so repeat until we make no further
- * progress. If a new mapped device is created
- * here it will also get removed.
- */
- goto retry;
+ if (t) {
+ dm_sync_table(md);
+ dm_table_destroy(t);
}
+ dm_put(md);
+ if (likely(keep_open_devices))
+ dm_destroy(md);
+ else
+ dm_destroy_immediate(md);
+
+ /*
+ * Some mapped devices may be using other mapped
+ * devices, so repeat until we make no further
+ * progress. If a new mapped device is created
+ * here it will also get removed.
+ */
+ goto retry;
}
up_write(&_hash_lock);
@@ -340,7 +377,7 @@ static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
hc->uuid = new_uuid;
mutex_unlock(&dm_hash_cells_mutex);
- list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid));
+ __link_uuid(hc);
}
/*
@@ -354,14 +391,14 @@ static char *__change_cell_name(struct hash_cell *hc, char *new_name)
/*
* Rename and move the name cell.
*/
- list_del(&hc->name_list);
+ __unlink_name(hc);
old_name = hc->name;
mutex_lock(&dm_hash_cells_mutex);
hc->name = new_name;
mutex_unlock(&dm_hash_cells_mutex);
- list_add(&hc->name_list, _name_buckets + hash_str(new_name));
+ __link_name(hc);
return old_name;
}
@@ -505,7 +542,7 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
- unsigned int i;
+ struct rb_node *n;
struct hash_cell *hc;
size_t len, needed = 0;
struct gendisk *disk;
@@ -518,11 +555,10 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
* Loop through all the devices working out how much
* space we need.
*/
- for (i = 0; i < NUM_BUCKETS; i++) {
- list_for_each_entry (hc, _name_buckets + i, name_list) {
- needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
- needed += align_val(sizeof(uint32_t));
- }
+ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
+ hc = container_of(n, struct hash_cell, name_node);
+ needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
+ needed += align_val(sizeof(uint32_t));
}
/*
@@ -540,21 +576,20 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
/*
* Now loop through filling out the names.
*/
- for (i = 0; i < NUM_BUCKETS; i++) {
- list_for_each_entry (hc, _name_buckets + i, name_list) {
- if (old_nl)
- old_nl->next = (uint32_t) ((void *) nl -
- (void *) old_nl);
- disk = dm_disk(hc->md);
- nl->dev = huge_encode_dev(disk_devt(disk));
- nl->next = 0;
- strcpy(nl->name, hc->name);
-
- old_nl = nl;
- event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
- *event_nr = dm_get_event_nr(hc->md);
- nl = align_ptr(event_nr + 1);
- }
+ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
+ hc = container_of(n, struct hash_cell, name_node);
+ if (old_nl)
+ old_nl->next = (uint32_t) ((void *) nl -
+ (void *) old_nl);
+ disk = dm_disk(hc->md);
+ nl->dev = huge_encode_dev(disk_devt(disk));
+ nl->next = 0;
+ strcpy(nl->name, hc->name);
+
+ old_nl = nl;
+ event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
+ *event_nr = dm_get_event_nr(hc->md);
+ nl = align_ptr(event_nr + 1);
}
/*
* If mismatch happens, security may be compromised due to buffer
@@ -1991,14 +2026,9 @@ int __init dm_interface_init(void)
{
int r;
- r = dm_hash_init();
- if (r)
- return r;
-
r = misc_register(&_dm_misc);
if (r) {
DMERR("misc_register failed for control device");
- dm_hash_exit();
return r;
}