summaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_trie.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r--net/ipv4/fib_trie.c256
1 files changed, 206 insertions, 50 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0671569ee6f0..4be234c7d8c3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#define VERSION "0.323"
+#define VERSION "0.325"
#include <linux/config.h>
#include <asm/uaccess.h>
@@ -136,6 +136,7 @@ struct trie_use_stats {
unsigned int semantic_match_passed;
unsigned int semantic_match_miss;
unsigned int null_node_hit;
+ unsigned int resize_node_skipped;
};
#endif
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
static int tnode_child_length(struct tnode *tn);
static struct node *resize(struct trie *t, struct tnode *tn);
-static struct tnode *inflate(struct trie *t, struct tnode *tn);
-static struct tnode *halve(struct trie *t, struct tnode *tn);
+static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
+static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
static void tnode_free(struct tnode *tn);
static void trie_dump_seq(struct seq_file *seq, struct trie *t);
extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
@@ -341,8 +342,10 @@ static struct leaf *leaf_new(void)
static struct leaf_info *leaf_info_new(int plen)
{
struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
- li->plen = plen;
- INIT_LIST_HEAD(&li->falh);
+ if(li) {
+ li->plen = plen;
+ INIT_LIST_HEAD(&li->falh);
+ }
return li;
}
@@ -356,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
kfree(li);
}
+static struct tnode *tnode_alloc(unsigned int size)
+{
+ if (size <= PAGE_SIZE) {
+ return kmalloc(size, GFP_KERNEL);
+ } else {
+ return (struct tnode *)
+ __get_free_pages(GFP_KERNEL, get_order(size));
+ }
+}
+
+static void __tnode_free(struct tnode *tn)
+{
+ unsigned int size = sizeof(struct tnode) +
+ (1<<tn->bits) * sizeof(struct node *);
+
+ if (size <= PAGE_SIZE)
+ kfree(tn);
+ else
+ free_pages((unsigned long)tn, get_order(size));
+}
+
static struct tnode* tnode_new(t_key key, int pos, int bits)
{
int nchildren = 1<<bits;
int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
- struct tnode *tn = kmalloc(sz, GFP_KERNEL);
+ struct tnode *tn = tnode_alloc(sz);
if(tn) {
memset(tn, 0, sz);
@@ -388,7 +412,7 @@ static void tnode_free(struct tnode *tn)
printk("FL %p \n", tn);
}
else if(IS_TNODE(tn)) {
- kfree(tn);
+ __tnode_free(tn);
if(trie_debug > 0 )
printk("FT %p \n", tn);
}
@@ -458,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
static struct node *resize(struct trie *t, struct tnode *tn)
{
int i;
+ int err = 0;
if (!tn)
return NULL;
@@ -554,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
*/
check_tnode(tn);
-
+
+ err = 0;
while ((tn->full_children > 0 &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
inflate_threshold * tnode_child_length(tn))) {
- tn = inflate(t, tn);
+ tn = inflate(t, tn, &err);
+
+ if(err) {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+ t->stats.resize_node_skipped++;
+#endif
+ break;
+ }
}
check_tnode(tn);
@@ -568,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
* Halve as long as the number of empty children in this
* node is above threshold.
*/
+
+ err = 0;
while (tn->bits > 1 &&
100 * (tnode_child_length(tn) - tn->empty_children) <
- halve_threshold * tnode_child_length(tn))
+ halve_threshold * tnode_child_length(tn)) {
+
+ tn = halve(t, tn, &err);
+
+ if(err) {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+ t->stats.resize_node_skipped++;
+#endif
+ break;
+ }
+ }
- tn = halve(t, tn);
/* Only one child remains */
@@ -597,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
return (struct node *) tn;
}
-static struct tnode *inflate(struct trie *t, struct tnode *tn)
+static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
{
struct tnode *inode;
struct tnode *oldtnode = tn;
@@ -609,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
- if (!tn)
- trie_bug("tnode_new failed");
+ if (!tn) {
+ *err = -ENOMEM;
+ return oldtnode;
+ }
+
+ /*
+ * Preallocate and store tnodes before the actual work so we
+ * don't get into an inconsistent state if memory allocation
+ * fails. In case of failure we return the oldnode and inflate
+ * of tnode is ignored.
+ */
+
+ for(i = 0; i < olen; i++) {
+ struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
+
+ if (inode &&
+ IS_TNODE(inode) &&
+ inode->pos == oldtnode->pos + oldtnode->bits &&
+ inode->bits > 1) {
+ struct tnode *left, *right;
+
+ t_key m = TKEY_GET_MASK(inode->pos, 1);
+
+ left = tnode_new(inode->key&(~m), inode->pos + 1,
+ inode->bits - 1);
+
+ if(!left) {
+ *err = -ENOMEM;
+ break;
+ }
+
+ right = tnode_new(inode->key|m, inode->pos + 1,
+ inode->bits - 1);
+
+ if(!right) {
+ *err = -ENOMEM;
+ break;
+ }
+
+ put_child(t, tn, 2*i, (struct node *) left);
+ put_child(t, tn, 2*i+1, (struct node *) right);
+ }
+ }
+
+ if(*err) {
+ int size = tnode_child_length(tn);
+ int j;
+
+ for(j = 0; j < size; j++)
+ if( tn->child[j])
+ tnode_free((struct tnode *)tn->child[j]);
+
+ tnode_free(tn);
+
+ *err = -ENOMEM;
+ return oldtnode;
+ }
for(i = 0; i < olen; i++) {
struct node *node = tnode_get_child(oldtnode, i);
@@ -623,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if(IS_LEAF(node) || ((struct tnode *) node)->pos >
tn->pos + tn->bits - 1) {
- if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
+ if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
1) == 0)
put_child(t, tn, 2*i, node);
else
@@ -663,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
* the position (inode->pos)
*/
- t_key m = TKEY_GET_MASK(inode->pos, 1);
-
/* Use the old key, but set the new significant
* bit to zero.
*/
- left = tnode_new(inode->key&(~m), inode->pos + 1,
- inode->bits - 1);
- if(!left)
- trie_bug("tnode_new failed");
-
-
- /* Use the old key, but set the new significant
- * bit to one.
- */
- right = tnode_new(inode->key|m, inode->pos + 1,
- inode->bits - 1);
+ left = (struct tnode *) tnode_get_child(tn, 2*i);
+ put_child(t, tn, 2*i, NULL);
+
+ if(!left)
+ BUG();
+
+ right = (struct tnode *) tnode_get_child(tn, 2*i+1);
+ put_child(t, tn, 2*i+1, NULL);
+
+ if(!right)
+ BUG();
- if(!right)
- trie_bug("tnode_new failed");
-
size = tnode_child_length(left);
for(j = 0; j < size; j++) {
put_child(t, left, j, inode->child[j]);
@@ -699,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
return tn;
}
-static struct tnode *halve(struct trie *t, struct tnode *tn)
+static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
{
struct tnode *oldtnode = tn;
struct node *left, *right;
@@ -710,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
- if(!tn)
- trie_bug("tnode_new failed");
+ if (!tn) {
+ *err = -ENOMEM;
+ return oldtnode;
+ }
+
+ /*
+ * Preallocate and store tnodes before the actual work so we
+ * don't get into an inconsistent state if memory allocation
+ * fails. In case of failure we return the oldnode and halve
+ * of tnode is ignored.
+ */
+
+ for(i = 0; i < olen; i += 2) {
+ left = tnode_get_child(oldtnode, i);
+ right = tnode_get_child(oldtnode, i+1);
+
+ /* Two nonempty children */
+ if( left && right) {
+ struct tnode *newBinNode =
+ tnode_new(left->key, tn->pos + tn->bits, 1);
+
+ if(!newBinNode) {
+ *err = -ENOMEM;
+ break;
+ }
+ put_child(t, tn, i/2, (struct node *)newBinNode);
+ }
+ }
+
+ if(*err) {
+ int size = tnode_child_length(tn);
+ int j;
+
+ for(j = 0; j < size; j++)
+ if( tn->child[j])
+ tnode_free((struct tnode *)tn->child[j]);
+
+ tnode_free(tn);
+
+ *err = -ENOMEM;
+ return oldtnode;
+ }
for(i = 0; i < olen; i += 2) {
left = tnode_get_child(oldtnode, i);
@@ -728,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
/* Two nonempty children */
else {
struct tnode *newBinNode =
- tnode_new(left->key, tn->pos + tn->bits, 1);
+ (struct tnode *) tnode_get_child(tn, i/2);
+ put_child(t, tn, i/2, NULL);
if(!newBinNode)
- trie_bug("tnode_new failed");
+ BUG();
put_child(t, newBinNode, 0, left);
put_child(t, newBinNode, 1, right);
@@ -879,8 +1014,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
return (struct node*) tn;
}
-static struct list_head *
-fib_insert_node(struct trie *t, u32 key, int plen)
+static struct list_head *
+fib_insert_node(struct trie *t, int *err, u32 key, int plen)
{
int pos, newpos;
struct tnode *tp = NULL, *tn = NULL;
@@ -940,7 +1075,6 @@ fib_insert_node(struct trie *t, u32 key, int plen)
if(tp && IS_LEAF(tp))
BUG();
- t->revision++;
/* Case 1: n is a leaf. Compare prefixes */
@@ -949,8 +1083,10 @@ fib_insert_node(struct trie *t, u32 key, int plen)
li = leaf_info_new(plen);
- if(! li)
- BUG();
+ if(! li) {
+ *err = -ENOMEM;
+ goto err;
+ }
fa_head = &li->falh;
insert_leaf_info(&l->list, li);
@@ -959,14 +1095,19 @@ fib_insert_node(struct trie *t, u32 key, int plen)
t->size++;
l = leaf_new();
- if(! l)
- BUG();
+ if(! l) {
+ *err = -ENOMEM;
+ goto err;
+ }
l->key = key;
li = leaf_info_new(plen);
- if(! li)
- BUG();
+ if(! li) {
+ tnode_free((struct tnode *) l);
+ *err = -ENOMEM;
+ goto err;
+ }
fa_head = &li->falh;
insert_leaf_info(&l->list, li);
@@ -1003,9 +1144,14 @@ fib_insert_node(struct trie *t, u32 key, int plen)
newpos = 0;
tn = tnode_new(key, newpos, 1); /* First tnode */
}
- if(!tn)
- trie_bug("tnode_pfx_new failed");
+ if(!tn) {
+ free_leaf_info(li);
+ tnode_free((struct tnode *) l);
+ *err = -ENOMEM;
+ goto err;
+ }
+
NODE_SET_PARENT(tn, tp);
missbit=tkey_extract_bits(key, newpos, 1);
@@ -1027,7 +1173,9 @@ fib_insert_node(struct trie *t, u32 key, int plen)
}
/* Rebalance the trie */
t->trie = trie_rebalance(t, tp);
-done:;
+done:
+ t->revision++;
+err:;
return fa_head;
}
@@ -1156,8 +1304,12 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
* Insert new entry to the list.
*/
- if(!fa_head)
- fa_head = fib_insert_node(t, key, plen);
+ if(!fa_head) {
+ fa_head = fib_insert_node(t, &err, key, plen);
+ err = 0;
+ if(err)
+ goto out_free_new_fa;
+ }
write_lock_bh(&fib_lock);
@@ -1170,6 +1322,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
succeeded:
return 0;
+
+out_free_new_fa:
+ kmem_cache_free(fn_alias_kmem, new_fa);
out:
fib_release_info(fi);
err:;
@@ -2279,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
+ seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
#ifdef CLEAR_STATS
memset(&(t->stats), 0, sizeof(t->stats));
#endif