summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@opensourcerouting.org>2015-04-13 10:21:36 +0200
committerDonald Sharp <sharpd@cumulusnetworks.com>2015-11-03 14:54:14 +0100
commit7e111b6b0de44a9d351cdfeda4c674224a65ec3b (patch)
tree849687e00fff77c217f49ec3b0bd9f641c3207eb /lib
parentlib: straighten out ORF prefix list support (diff)
downloadfrr-7e111b6b0de44a9d351cdfeda4c674224a65ec3b.tar.xz
frr-7e111b6b0de44a9d351cdfeda4c674224a65ec3b.zip
lib: use trie structure for prefix list matching
Prefix lists were implemented with a simple linear list that is scanned sequentially. This is, of course, extremely inefficient as it scales by O(n). This patch adds a trie-ish data structure that allows quickly descending based on the prefix. Note that the trie structure used here is designed for real-world use, hence it uses a relatively crude fixed-size bytewise table instead of some fancy balancing scheme. It is quite cacheline efficient. Using real-world routeserver prefix lists, matching against a fulltable dump: entries before after factor 9103 63.8s .0124s 5142x 772 4.52s .0101s 445.3x 86 .445s .0098s 45.51x 7 .0379s .0099s 3.834x 2 .0136s .0095s 1.440x 1 .0084s .0095s .879x This buys CPU with memory. Memory usage on an IXP setup with 100k prefix list entries is an additional 4 MB on top of the 9.5 MB that it was before.
Diffstat (limited to 'lib')
-rw-r--r--lib/memtypes.c1
-rw-r--r--lib/plist.c220
2 files changed, 206 insertions, 15 deletions
diff --git a/lib/memtypes.c b/lib/memtypes.c
index 828820f30..c63178b6d 100644
--- a/lib/memtypes.c
+++ b/lib/memtypes.c
@@ -50,6 +50,7 @@ struct memory_list memory_list_lib[] =
{ MTYPE_PREFIX_LIST, "Prefix List" },
{ MTYPE_PREFIX_LIST_ENTRY, "Prefix List Entry" },
{ MTYPE_PREFIX_LIST_STR, "Prefix List Str" },
+ { MTYPE_PREFIX_LIST_TRIE, "Prefix List Trie Table" },
{ MTYPE_ROUTE_MAP, "Route map" },
{ MTYPE_ROUTE_MAP_NAME, "Route map name" },
{ MTYPE_ROUTE_MAP_INDEX, "Route map index" },
diff --git a/lib/plist.c b/lib/plist.c
index 7a356dffb..2fdb0b33e 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -34,6 +34,26 @@
#include "plist_int.h"
+/* not currently changeable, code assumes bytes further down */
+#define PLC_BITS 8
+#define PLC_LEN (1 << PLC_BITS)
+#define PLC_MAXLEVELV4 2 /* /24 for IPv4 */
+#define PLC_MAXLEVELV6 4 /* /48 for IPv6 */
+#define PLC_MAXLEVEL 4 /* max(v4,v6) */
+
+struct pltrie_entry {
+ union {
+ struct pltrie_table *next_table;
+ struct prefix_list_entry *final_chain;
+ };
+
+ struct prefix_list_entry *up_chain;
+};
+
+struct pltrie_table {
+ struct pltrie_entry entries[PLC_LEN];
+};
+
/* List of struct prefix_list. */
struct prefix_list_list
{
@@ -61,6 +81,9 @@ struct prefix_master
/* Hook function which is executed when prefix_list is deleted. */
void (*delete_hook) (struct prefix_list *);
+
+ /* number of bytes that have a trie level */
+ size_t trie_depth;
};
/* Static structure of IPv4 prefix_list's master. */
@@ -71,6 +94,8 @@ static struct prefix_master prefix_master_ipv4 =
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV4,
};
#ifdef HAVE_IPV6
@@ -82,27 +107,33 @@ static struct prefix_master prefix_master_ipv6 =
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV6,
};
#endif /* HAVE_IPV6*/
/* Static structure of BGP ORF prefix_list's master. */
-static struct prefix_master prefix_master_orf_v4 =
-{
+static struct prefix_master prefix_master_orf_v4 =
+{
{NULL, NULL},
{NULL, NULL},
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV4,
};
/* Static structure of BGP ORF prefix_list's master. */
-static struct prefix_master prefix_master_orf_v6 =
-{
+static struct prefix_master prefix_master_orf_v6 =
+{
{NULL, NULL},
{NULL, NULL},
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV6,
};
static struct prefix_master *
@@ -207,6 +238,7 @@ prefix_list_insert (afi_t afi, int orf, const char *name)
plist = prefix_list_new ();
plist->name = XSTRDUP (MTYPE_PREFIX_LIST_STR, name);
plist->master = master;
+ plist->trie = XCALLOC (MTYPE_PREFIX_LIST_TRIE, sizeof (struct pltrie_table));
/* If name is made by all digit character. We treat it as
number. */
@@ -340,6 +372,8 @@ prefix_list_delete (struct prefix_list *plist)
if (plist->name)
XFREE (MTYPE_PREFIX_LIST_STR, plist->name);
+ XFREE (MTYPE_PREFIX_LIST_TRIE, plist->trie);
+
prefix_list_free (plist);
}
@@ -441,10 +475,87 @@ prefix_list_entry_lookup (struct prefix_list *plist, struct prefix *prefix,
}
static void
+trie_walk_affected (size_t validbits, struct pltrie_table *table, uint8_t byte,
+ struct prefix_list_entry *object,
+ void (*fn)(struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr))
+{
+ uint8_t mask;
+ uint16_t bwalk;
+
+ if (validbits > PLC_BITS)
+ {
+ fn (object, &table->entries[byte].final_chain);
+ return;
+ }
+
+ mask = (1 << (8 - validbits)) - 1;
+ for (bwalk = byte & ~mask; bwalk <= byte + mask; bwalk++)
+ {
+ fn (object, &table->entries[bwalk].up_chain);
+ }
+}
+
+static void trie_uninstall_fn (struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr)
+{
+ for (; *updptr; updptr = &(*updptr)->next_best)
+ if (*updptr == object)
+ {
+ *updptr = object->next_best;
+ break;
+ }
+}
+
+static int
+trie_table_empty (struct pltrie_table *table)
+{
+ size_t i;
+ for (i = 0; i < PLC_LEN; i++)
+ if (table->entries[i].next_table || table->entries[i].final_chain)
+ return 0;
+ return 1;
+}
+
+static void
+prefix_list_trie_del (struct prefix_list *plist,
+ struct prefix_list_entry *pentry)
+{
+ size_t depth, maxdepth = plist->master->trie_depth;
+ uint8_t *bytes = &pentry->prefix.u.prefix;
+ size_t validbits = pentry->prefix.prefixlen;
+ struct pltrie_table *table, **tables[PLC_MAXLEVEL];
+
+ table = plist->trie;
+ for (depth = 0; validbits > PLC_BITS && depth < maxdepth - 1; depth++)
+ {
+ uint8_t byte = bytes[depth];
+ assert (table->entries[byte].next_table);
+
+ tables[depth + 1] = &table->entries[byte].next_table;
+ table = table->entries[byte].next_table;
+
+ validbits -= PLC_BITS;
+ }
+
+ trie_walk_affected (validbits, table, bytes[depth], pentry, trie_uninstall_fn);
+
+ for (; depth > 0; depth--)
+ if (trie_table_empty (*tables[depth]))
+ {
+ XFREE (MTYPE_PREFIX_LIST_TRIE, *tables[depth]);
+ *tables[depth] = NULL;
+ }
+}
+
+
+static void
prefix_list_entry_delete (struct prefix_list *plist,
struct prefix_list_entry *pentry,
int update_list)
{
+ prefix_list_trie_del (plist, pentry);
+
if (plist == NULL || pentry == NULL)
return;
if (pentry->prev)
@@ -473,6 +584,52 @@ prefix_list_entry_delete (struct prefix_list *plist,
}
}
+static void trie_install_fn (struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr)
+{
+ while (*updptr)
+ {
+ if (*updptr == object)
+ return;
+ if ((*updptr)->prefix.prefixlen < object->prefix.prefixlen)
+ break;
+ if ((*updptr)->seq > object->seq)
+ break;
+ updptr = &(*updptr)->next_best;
+ }
+
+ if (!object->next_best)
+ object->next_best = *updptr;
+ else
+ assert (object->next_best == *updptr || !*updptr);
+
+ *updptr = object;
+}
+
+static void
+prefix_list_trie_add (struct prefix_list *plist,
+ struct prefix_list_entry *pentry)
+{
+ size_t depth = plist->master->trie_depth;
+ uint8_t *bytes = &pentry->prefix.u.prefix;
+ size_t validbits = pentry->prefix.prefixlen;
+ struct pltrie_table *table;
+
+ table = plist->trie;
+ while (validbits > PLC_BITS && depth > 1)
+ {
+ if (!table->entries[*bytes].next_table)
+ table->entries[*bytes].next_table = XCALLOC (MTYPE_PREFIX_LIST_TRIE,
+ sizeof(struct pltrie_table));
+ table = table->entries[*bytes].next_table;
+ bytes++;
+ depth--;
+ validbits -= PLC_BITS;
+ }
+
+ trie_walk_affected (validbits, table, *bytes, pentry, trie_install_fn);
+}
+
static void
prefix_list_entry_add (struct prefix_list *plist,
struct prefix_list_entry *pentry)
@@ -518,6 +675,8 @@ prefix_list_entry_add (struct prefix_list *plist,
plist->tail = pentry;
}
+ prefix_list_trie_add (plist, pentry);
+
/* Increment count. */
plist->count++;
@@ -575,10 +734,13 @@ prefix_list_entry_match (struct prefix_list_entry *pentry, struct prefix *p)
enum prefix_list_type
prefix_list_apply (struct prefix_list *plist, void *object)
{
- struct prefix_list_entry *pentry;
- struct prefix *p;
+ struct prefix_list_entry *pentry, *pbest = NULL;
- p = (struct prefix *) object;
+ struct prefix *p = (struct prefix *) object;
+ uint8_t *byte = &p->u.prefix;
+ size_t depth = plist->master->trie_depth;
+ size_t validbits = p->prefixlen;
+ struct pltrie_table *table;
if (plist == NULL)
return PREFIX_DENY;
@@ -586,17 +748,45 @@ prefix_list_apply (struct prefix_list *plist, void *object)
if (plist->count == 0)
return PREFIX_PERMIT;
- for (pentry = plist->head; pentry; pentry = pentry->next)
+ table = plist->trie;
+ while (1)
{
- pentry->refcnt++;
- if (prefix_list_entry_match (pentry, p))
- {
- pentry->hitcnt++;
- return pentry->type;
- }
+ for (pentry = table->entries[*byte].up_chain; pentry; pentry = pentry->next_best)
+ {
+ if (pbest && pbest->seq < pentry->seq)
+ continue;
+ if (prefix_list_entry_match (pentry, p))
+ pbest = pentry;
+ }
+
+ if (validbits <= PLC_BITS)
+ break;
+ validbits -= PLC_BITS;
+
+ if (--depth)
+ {
+ if (!table->entries[*byte].next_table)
+ break;
+
+ table = table->entries[*byte].next_table;
+ byte++;
+ continue;
+ }
+
+ for (pentry = table->entries[*byte].final_chain; pentry; pentry = pentry->next_best)
+ {
+ if (pbest && pbest->seq < pentry->seq)
+ continue;
+ if (prefix_list_entry_match (pentry, p))
+ pbest = pentry;
+ }
+ break;
}
- return PREFIX_DENY;
+ if (pbest == NULL)
+ return PREFIX_DENY;
+
+ return pbest->type;
}
static void __attribute__ ((unused))