summaryrefslogtreecommitdiffstats
path: root/net/openvswitch/flow_table.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 05:01:30 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 05:01:30 +0100
commitc5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch)
tree9830baf38832769e1cf621708889111bbe3c93df /net/openvswitch/flow_table.c
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jik... (diff)
parentcrypto: fix af_alg_make_sg() conversion to iov_iter (diff)
downloadlinux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.tar.xz
linux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) More iov_iter conversion work from Al Viro. [ The "crypto: switch af_alg_make_sg() to iov_iter" commit was wrong, and this pull actually adds an extra commit on top of the branch I'm pulling to fix that up, so that the pre-merge state is ok. - Linus ] 2) Various optimizations to the ipv4 forwarding information base trie lookup implementation. From Alexander Duyck. 3) Remove sock_iocb altogether, from CHristoph Hellwig. 4) Allow congestion control algorithm selection via routing metrics. From Daniel Borkmann. 5) Make ipv4 uncached route list per-cpu, from Eric Dumazet. 6) Handle rfs hash collisions more gracefully, also from Eric Dumazet. 7) Add xmit_more support to r8169, e1000, and e1000e drivers. From Florian Westphal. 8) Transparent Ethernet Bridging support for GRO, from Jesse Gross. 9) Add BPF packet actions to packet scheduler, from Jiri Pirko. 10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer. 11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman Kwok. 12) More sanely handle out-of-window dupacks, which can result in serious ACK storms. From Neal Cardwell. 13) Various rhashtable bug fixes and enhancements, from Herbert Xu, Patrick McHardy, and Thomas Graf. 14) Support xmit_more in be2net, from Sathya Perla. 15) Group Policy extensions for vxlan, from Thomas Graf. 16) Remove Checksum Offload support for vxlan, from Tom Herbert. 17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From Vlad Yasevich. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits) crypto: fix af_alg_make_sg() conversion to iov_iter ipv4: Namespecify TCP PMTU mechanism i40e: Fix for stats init function call in Rx setup tcp: don't include Fast Open option in SYN-ACK on pure SYN-data openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set ipv6: Make __ipv6_select_ident static ipv6: Fix fragment id assignment on LE arches. bridge: Fix inability to add non-vlan fdb entry net: Mellanox: Delete unnecessary checks before the function call "vunmap" cxgb4: Add support in cxgb4 to get expansion rom version via ethtool ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version net: dsa: Remove redundant phy_attach() IB/mlx4: Reset flow support for IB kernel ULPs IB/mlx4: Always use the correct port for mirrored multicast attachments net/bonding: Fix potential bad memory access during bonding events tipc: remove tipc_snprintf tipc: nl compat add noop and remove legacy nl framework tipc: convert legacy nl stats show to nl compat tipc: convert legacy nl net id get to nl compat tipc: convert legacy nl net id set to nl compat ...
Diffstat (limited to 'net/openvswitch/flow_table.c')
-rw-r--r--net/openvswitch/flow_table.c228
1 files changed, 178 insertions, 50 deletions
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5899bf161c61..4613df8c8290 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -85,6 +85,8 @@ struct sw_flow *ovs_flow_alloc(void)
flow->sf_acts = NULL;
flow->mask = NULL;
+ flow->id.unmasked_key = NULL;
+ flow->id.ufid_len = 0;
flow->stats_last_writer = NUMA_NO_NODE;
/* Initialize the default stat node. */
@@ -139,6 +141,8 @@ static void flow_free(struct sw_flow *flow)
{
int node;
+ if (ovs_identifier_is_key(&flow->id))
+ kfree(flow->id.unmasked_key);
kfree((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node)
if (flow->stats[node])
@@ -200,18 +204,28 @@ static struct table_instance *table_instance_alloc(int new_size)
int ovs_flow_tbl_init(struct flow_table *table)
{
- struct table_instance *ti;
+ struct table_instance *ti, *ufid_ti;
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
return -ENOMEM;
+ ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!ufid_ti)
+ goto free_ti;
+
rcu_assign_pointer(table->ti, ti);
+ rcu_assign_pointer(table->ufid_ti, ufid_ti);
INIT_LIST_HEAD(&table->mask_list);
table->last_rehash = jiffies;
table->count = 0;
+ table->ufid_count = 0;
return 0;
+
+free_ti:
+ __table_instance_destroy(ti);
+ return -ENOMEM;
}
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@@ -221,13 +235,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ bool deferred)
{
int i;
if (!ti)
return;
+ BUG_ON(!ufid_ti);
if (ti->keep_flows)
goto skip_flows;
@@ -236,18 +253,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
struct hlist_head *head = flex_array_get(ti->buckets, i);
struct hlist_node *n;
int ver = ti->node_ver;
+ int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del_rcu(&flow->hash_node[ver]);
+ hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
+ hlist_del_rcu(&flow->flow_table.node[ver]);
+ if (ovs_identifier_is_ufid(&flow->id))
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
ovs_flow_free(flow, deferred);
}
}
skip_flows:
- if (deferred)
+ if (deferred) {
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- else
+ call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+ } else {
__table_instance_destroy(ti);
+ __table_instance_destroy(ufid_ti);
+ }
}
/* No need for locking this function is called from RCU callback or
@@ -256,8 +279,9 @@ skip_flows:
void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = rcu_dereference_raw(table->ti);
+ struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
- table_instance_destroy(ti, false);
+ table_instance_destroy(ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -272,7 +296,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
while (*bucket < ti->n_buckets) {
i = 0;
head = flex_array_get(ti->buckets, *bucket);
- hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) {
i++;
continue;
@@ -294,16 +318,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
(hash & (ti->n_buckets - 1)));
}
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+ struct sw_flow *flow)
+{
+ struct hlist_head *head;
+
+ head = find_bucket(ti, flow->flow_table.hash);
+ hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+ struct sw_flow *flow)
{
struct hlist_head *head;
- head = find_bucket(ti, flow->hash);
- hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+ head = find_bucket(ti, flow->ufid_table.hash);
+ hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
}
static void flow_table_copy_flows(struct table_instance *old,
- struct table_instance *new)
+ struct table_instance *new, bool ufid)
{
int old_ver;
int i;
@@ -318,15 +352,21 @@ static void flow_table_copy_flows(struct table_instance *old,
head = flex_array_get(old->buckets, i);
- hlist_for_each_entry(flow, head, hash_node[old_ver])
- table_instance_insert(new, flow);
+ if (ufid)
+ hlist_for_each_entry(flow, head,
+ ufid_table.node[old_ver])
+ ufid_table_instance_insert(new, flow);
+ else
+ hlist_for_each_entry(flow, head,
+ flow_table.node[old_ver])
+ table_instance_insert(new, flow);
}
old->keep_flows = true;
}
static struct table_instance *table_instance_rehash(struct table_instance *ti,
- int n_buckets)
+ int n_buckets, bool ufid)
{
struct table_instance *new_ti;
@@ -334,32 +374,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
if (!new_ti)
return NULL;
- flow_table_copy_flows(ti, new_ti);
+ flow_table_copy_flows(ti, new_ti, ufid);
return new_ti;
}
int ovs_flow_tbl_flush(struct flow_table *flow_table)
{
- struct table_instance *old_ti;
- struct table_instance *new_ti;
+ struct table_instance *old_ti, *new_ti;
+ struct table_instance *old_ufid_ti, *new_ufid_ti;
- old_ti = ovsl_dereference(flow_table->ti);
new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ti)
return -ENOMEM;
+ new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!new_ufid_ti)
+ goto err_free_ti;
+
+ old_ti = ovsl_dereference(flow_table->ti);
+ old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
rcu_assign_pointer(flow_table->ti, new_ti);
+ rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies;
flow_table->count = 0;
+ flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, true);
+ table_instance_destroy(old_ti, old_ufid_ti, true);
return 0;
+
+err_free_ti:
+ __table_instance_destroy(new_ti);
+ return -ENOMEM;
}
-static u32 flow_hash(const struct sw_flow_key *key, int key_start,
- int key_end)
+static u32 flow_hash(const struct sw_flow_key *key,
+ const struct sw_flow_key_range *range)
{
+ int key_start = range->start;
+ int key_end = range->end;
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
@@ -395,19 +448,20 @@ static bool cmp_key(const struct sw_flow_key *key1,
static bool flow_cmp_masked_key(const struct sw_flow *flow,
const struct sw_flow_key *key,
- int key_start, int key_end)
+ const struct sw_flow_key_range *range)
{
- return cmp_key(&flow->key, key, key_start, key_end);
+ return cmp_key(&flow->key, key, range->start, range->end);
}
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_match *match)
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_match *match)
{
struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key);
int key_end = match->range.end;
- return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+ BUG_ON(ovs_identifier_is_ufid(&flow->id));
+ return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
}
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@@ -416,18 +470,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
{
struct sw_flow *flow;
struct hlist_head *head;
- int key_start = mask->range.start;
- int key_end = mask->range.end;
u32 hash;
struct sw_flow_key masked_key;
ovs_flow_mask_key(&masked_key, unmasked, mask);
- hash = flow_hash(&masked_key, key_start, key_end);
+ hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
- if (flow->mask == mask && flow->hash == hash &&
- flow_cmp_masked_key(flow, &masked_key,
- key_start, key_end))
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ if (flow->mask == mask && flow->flow_table.hash == hash &&
+ flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
}
return NULL;
@@ -469,7 +520,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
/* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) {
flow = masked_flow_lookup(ti, match->key, mask);
- if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
+ if (flow && ovs_identifier_is_key(&flow->id) &&
+ ovs_flow_cmp_unmasked_key(flow, match))
+ return flow;
+ }
+ return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+ return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+ const struct sw_flow_id *sfid)
+{
+ if (flow->id.ufid_len != sfid->ufid_len)
+ return false;
+
+ return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+{
+ if (ovs_identifier_is_ufid(&flow->id))
+ return flow_cmp_masked_key(flow, match->key, &match->range);
+
+ return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+ const struct sw_flow_id *ufid)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ u32 hash;
+
+ hash = ufid_hash(ufid);
+ head = find_bucket(ti, hash);
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ if (flow->ufid_table.hash == hash &&
+ ovs_flow_cmp_ufid(flow, ufid))
return flow;
}
return NULL;
@@ -486,9 +578,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
return num;
}
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+ bool ufid)
{
- return table_instance_rehash(ti, ti->n_buckets * 2);
+ return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -513,10 +606,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
+ struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
table->count--;
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+ table->ufid_count--;
+ }
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held.
@@ -585,34 +683,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
}
/* Must be called with OVS mutex held. */
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- const struct sw_flow_mask *mask)
+static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *new_ti = NULL;
struct table_instance *ti;
- int err;
-
- err = flow_mask_insert(table, flow, mask);
- if (err)
- return err;
- flow->hash = flow_hash(&flow->key, flow->mask->range.start,
- flow->mask->range.end);
+ flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
ti = ovsl_dereference(table->ti);
table_instance_insert(ti, flow);
table->count++;
/* Expand table, if necessary, to make room. */
if (table->count > ti->n_buckets)
- new_ti = table_instance_expand(ti);
+ new_ti = table_instance_expand(ti, false);
else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
- new_ti = table_instance_rehash(ti, ti->n_buckets);
+ new_ti = table_instance_rehash(ti, ti->n_buckets, false);
if (new_ti) {
rcu_assign_pointer(table->ti, new_ti);
- table_instance_destroy(ti, true);
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
table->last_rehash = jiffies;
}
+}
+
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+ struct table_instance *ti;
+
+ flow->ufid_table.hash = ufid_hash(&flow->id);
+ ti = ovsl_dereference(table->ufid_ti);
+ ufid_table_instance_insert(ti, flow);
+ table->ufid_count++;
+
+ /* Expand table, if necessary, to make room. */
+ if (table->ufid_count > ti->n_buckets) {
+ struct table_instance *new_ti;
+
+ new_ti = table_instance_expand(ti, true);
+ if (new_ti) {
+ rcu_assign_pointer(table->ufid_ti, new_ti);
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+ }
+ }
+}
+
+/* Must be called with OVS mutex held. */
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+ const struct sw_flow_mask *mask)
+{
+ int err;
+
+ err = flow_mask_insert(table, flow, mask);
+ if (err)
+ return err;
+ flow_key_insert(table, flow);
+ if (ovs_identifier_is_ufid(&flow->id))
+ flow_ufid_insert(table, flow);
+
return 0;
}