diff options
author | David Lamparter <equinox@opensourcerouting.org> | 2015-04-13 10:21:36 +0200 |
---|---|---|
committer | Donald Sharp <sharpd@cumulusnetworks.com> | 2015-11-03 14:54:14 +0100 |
commit | 7e111b6b0de44a9d351cdfeda4c674224a65ec3b (patch) | |
tree | 849687e00fff77c217f49ec3b0bd9f641c3207eb /lib | |
parent | lib: straighten out ORF prefix list support (diff) | |
download | frr-7e111b6b0de44a9d351cdfeda4c674224a65ec3b.tar.xz frr-7e111b6b0de44a9d351cdfeda4c674224a65ec3b.zip |
lib: use trie structure for prefix list matching
Prefix lists were implemented with a simple linear list that is scanned
sequentially. This is, of course, extremely inefficient as it scales by
O(n). This patch adds a trie-ish data structure that allows quickly
descending based on the prefix.
Note that the trie structure used here is designed for real-world use,
hence it uses a relatively crude fixed-size bytewise table instead of
some fancy balancing scheme. It is quite cacheline efficient.
Using real-world routeserver prefix lists, matching against a fulltable
dump:
entries before after factor
9103 63.8s .0124s 5142x
772 4.52s .0101s 445.3x
86 .445s .0098s 45.51x
7 .0379s .0099s 3.834x
2 .0136s .0095s 1.440x
1 .0084s .0095s .879x
This buys CPU with memory. Memory usage on an IXP setup with 100k
prefix list entries is an additional 4 MB on top of the 9.5 MB that it
was before.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/memtypes.c | 1 | ||||
-rw-r--r-- | lib/plist.c | 220 |
2 files changed, 206 insertions, 15 deletions
diff --git a/lib/memtypes.c b/lib/memtypes.c index 828820f30..c63178b6d 100644 --- a/lib/memtypes.c +++ b/lib/memtypes.c @@ -50,6 +50,7 @@ struct memory_list memory_list_lib[] = { MTYPE_PREFIX_LIST, "Prefix List" }, { MTYPE_PREFIX_LIST_ENTRY, "Prefix List Entry" }, { MTYPE_PREFIX_LIST_STR, "Prefix List Str" }, + { MTYPE_PREFIX_LIST_TRIE, "Prefix List Trie Table" }, { MTYPE_ROUTE_MAP, "Route map" }, { MTYPE_ROUTE_MAP_NAME, "Route map name" }, { MTYPE_ROUTE_MAP_INDEX, "Route map index" }, diff --git a/lib/plist.c b/lib/plist.c index 7a356dffb..2fdb0b33e 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -34,6 +34,26 @@ #include "plist_int.h" +/* not currently changeable, code assumes bytes further down */ +#define PLC_BITS 8 +#define PLC_LEN (1 << PLC_BITS) +#define PLC_MAXLEVELV4 2 /* /24 for IPv4 */ +#define PLC_MAXLEVELV6 4 /* /48 for IPv6 */ +#define PLC_MAXLEVEL 4 /* max(v4,v6) */ + +struct pltrie_entry { + union { + struct pltrie_table *next_table; + struct prefix_list_entry *final_chain; + }; + + struct prefix_list_entry *up_chain; +}; + +struct pltrie_table { + struct pltrie_entry entries[PLC_LEN]; +}; + /* List of struct prefix_list. */ struct prefix_list_list { @@ -61,6 +81,9 @@ struct prefix_master /* Hook function which is executed when prefix_list is deleted. */ void (*delete_hook) (struct prefix_list *); + + /* number of bytes that have a trie level */ + size_t trie_depth; }; /* Static structure of IPv4 prefix_list's master. */ @@ -71,6 +94,8 @@ static struct prefix_master prefix_master_ipv4 = 1, NULL, NULL, + NULL, + PLC_MAXLEVELV4, }; #ifdef HAVE_IPV6 @@ -82,27 +107,33 @@ static struct prefix_master prefix_master_ipv6 = 1, NULL, NULL, + NULL, + PLC_MAXLEVELV6, }; #endif /* HAVE_IPV6*/ /* Static structure of BGP ORF prefix_list's master. */ -static struct prefix_master prefix_master_orf_v4 = -{ +static struct prefix_master prefix_master_orf_v4 = +{ {NULL, NULL}, {NULL, NULL}, 1, NULL, NULL, + NULL, + PLC_MAXLEVELV4, }; /* Static structure of BGP ORF prefix_list's master. */ -static struct prefix_master prefix_master_orf_v6 = -{ +static struct prefix_master prefix_master_orf_v6 = +{ {NULL, NULL}, {NULL, NULL}, 1, NULL, NULL, + NULL, + PLC_MAXLEVELV6, }; static struct prefix_master * @@ -207,6 +238,7 @@ prefix_list_insert (afi_t afi, int orf, const char *name) plist = prefix_list_new (); plist->name = XSTRDUP (MTYPE_PREFIX_LIST_STR, name); plist->master = master; + plist->trie = XCALLOC (MTYPE_PREFIX_LIST_TRIE, sizeof (struct pltrie_table)); /* If name is made by all digit character. We treat it as number. */ @@ -340,6 +372,8 @@ prefix_list_delete (struct prefix_list *plist) if (plist->name) XFREE (MTYPE_PREFIX_LIST_STR, plist->name); + XFREE (MTYPE_PREFIX_LIST_TRIE, plist->trie); + prefix_list_free (plist); } @@ -441,10 +475,87 @@ prefix_list_entry_lookup (struct prefix_list *plist, struct prefix *prefix, } static void +trie_walk_affected (size_t validbits, struct pltrie_table *table, uint8_t byte, + struct prefix_list_entry *object, + void (*fn)(struct prefix_list_entry *object, + struct prefix_list_entry **updptr)) +{ + uint8_t mask; + uint16_t bwalk; + + if (validbits > PLC_BITS) + { + fn (object, &table->entries[byte].final_chain); + return; + } + + mask = (1 << (8 - validbits)) - 1; + for (bwalk = byte & ~mask; bwalk <= byte + mask; bwalk++) + { + fn (object, &table->entries[bwalk].up_chain); + } +} + +static void trie_uninstall_fn (struct prefix_list_entry *object, + struct prefix_list_entry **updptr) +{ + for (; *updptr; updptr = &(*updptr)->next_best) + if (*updptr == object) + { + *updptr = object->next_best; + break; + } +} + +static int +trie_table_empty (struct pltrie_table *table) +{ + size_t i; + for (i = 0; i < PLC_LEN; i++) + if (table->entries[i].next_table || table->entries[i].final_chain) + return 0; + return 1; +} + +static void +prefix_list_trie_del (struct prefix_list *plist, + struct prefix_list_entry *pentry) +{ + size_t depth, maxdepth = plist->master->trie_depth; + uint8_t *bytes = &pentry->prefix.u.prefix; + size_t validbits = pentry->prefix.prefixlen; + struct pltrie_table *table, **tables[PLC_MAXLEVEL]; + + table = plist->trie; + for (depth = 0; validbits > PLC_BITS && depth < maxdepth - 1; depth++) + { + uint8_t byte = bytes[depth]; + assert (table->entries[byte].next_table); + + tables[depth + 1] = &table->entries[byte].next_table; + table = table->entries[byte].next_table; + + validbits -= PLC_BITS; + } + + trie_walk_affected (validbits, table, bytes[depth], pentry, trie_uninstall_fn); + + for (; depth > 0; depth--) + if (trie_table_empty (*tables[depth])) + { + XFREE (MTYPE_PREFIX_LIST_TRIE, *tables[depth]); + *tables[depth] = NULL; + } +} + + +static void prefix_list_entry_delete (struct prefix_list *plist, struct prefix_list_entry *pentry, int update_list) { + prefix_list_trie_del (plist, pentry); + if (plist == NULL || pentry == NULL) return; if (pentry->prev) @@ -473,6 +584,52 @@ prefix_list_entry_delete (struct prefix_list *plist, } } +static void trie_install_fn (struct prefix_list_entry *object, + struct prefix_list_entry **updptr) +{ + while (*updptr) + { + if (*updptr == object) + return; + if ((*updptr)->prefix.prefixlen < object->prefix.prefixlen) + break; + if ((*updptr)->seq > object->seq) + break; + updptr = &(*updptr)->next_best; + } + + if (!object->next_best) + object->next_best = *updptr; + else + assert (object->next_best == *updptr || !*updptr); + + *updptr = object; +} + +static void +prefix_list_trie_add (struct prefix_list *plist, + struct prefix_list_entry *pentry) +{ + size_t depth = plist->master->trie_depth; + uint8_t *bytes = &pentry->prefix.u.prefix; + size_t validbits = pentry->prefix.prefixlen; + struct pltrie_table *table; + + table = plist->trie; + while (validbits > PLC_BITS && depth > 1) + { + if (!table->entries[*bytes].next_table) + table->entries[*bytes].next_table = XCALLOC (MTYPE_PREFIX_LIST_TRIE, + sizeof(struct pltrie_table)); + table = table->entries[*bytes].next_table; + bytes++; + depth--; + validbits -= PLC_BITS; + } + + trie_walk_affected (validbits, table, *bytes, pentry, trie_install_fn); +} + static void prefix_list_entry_add (struct prefix_list *plist, struct prefix_list_entry *pentry) @@ -518,6 +675,8 @@ prefix_list_entry_add (struct prefix_list *plist, plist->tail = pentry; } + prefix_list_trie_add (plist, pentry); + /* Increment count. */ plist->count++; @@ -575,10 +734,13 @@ prefix_list_entry_match (struct prefix_list_entry *pentry, struct prefix *p) enum prefix_list_type prefix_list_apply (struct prefix_list *plist, void *object) { - struct prefix_list_entry *pentry; - struct prefix *p; + struct prefix_list_entry *pentry, *pbest = NULL; - p = (struct prefix *) object; + struct prefix *p = (struct prefix *) object; + uint8_t *byte = &p->u.prefix; + size_t depth = plist->master->trie_depth; + size_t validbits = p->prefixlen; + struct pltrie_table *table; if (plist == NULL) return PREFIX_DENY; @@ -586,17 +748,45 @@ prefix_list_apply (struct prefix_list *plist, void *object) if (plist->count == 0) return PREFIX_PERMIT; - for (pentry = plist->head; pentry; pentry = pentry->next) + table = plist->trie; + while (1) { - pentry->refcnt++; - if (prefix_list_entry_match (pentry, p)) - { - pentry->hitcnt++; - return pentry->type; - } + for (pentry = table->entries[*byte].up_chain; pentry; pentry = pentry->next_best) + { + if (pbest && pbest->seq < pentry->seq) + continue; + if (prefix_list_entry_match (pentry, p)) + pbest = pentry; + } + + if (validbits <= PLC_BITS) + break; + validbits -= PLC_BITS; + + if (--depth) + { + if (!table->entries[*byte].next_table) + break; + + table = table->entries[*byte].next_table; + byte++; + continue; + } + + for (pentry = table->entries[*byte].final_chain; pentry; pentry = pentry->next_best) + { + if (pbest && pbest->seq < pentry->seq) + continue; + if (prefix_list_entry_match (pentry, p)) + pbest = pentry; + } + break; } - return PREFIX_DENY; + if (pbest == NULL) + return PREFIX_DENY; + + return pbest->type; } static void __attribute__ ((unused)) |