diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 03:07:07 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 03:07:07 +0100 |
commit | 6be35c700f742e911ecedd07fcc43d4439922334 (patch) | |
tree | ca9f37214d204465fcc2d79c82efd291e357c53c /drivers/net/bonding | |
parent | Merge tag 'for-linus-20121212' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
parent | net/mlx4_en: Add support for destination MAC in steering rules (diff) | |
download | linux-6be35c700f742e911ecedd07fcc43d4439922334.tar.xz linux-6be35c700f742e911ecedd07fcc43d4439922334.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller:
1) Allow to dump, monitor, and change the bridge multicast database
using netlink. From Cong Wang.
2) RFC 5961 TCP blind data injection attack mitigation, from Eric
Dumazet.
3) Networking user namespace support from Eric W. Biederman.
4) tuntap/virtio-net multiqueue support by Jason Wang.
5) Support for checksum offload of encapsulated packets (basically,
tunneled traffic can still be checksummed by HW). From Joseph
Gasparakis.
6) Allow BPF filter access to VLAN tags, from Eric Dumazet and
Daniel Borkmann.
7) Bridge port parameters over netlink and BPDU blocking support
from Stephen Hemminger.
8) Improve data access patterns during inet socket demux by rearranging
socket layout, from Eric Dumazet.
9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and
Jon Maloy.
10) Update TCP socket hash sizing to be more in line with current day
realities. The existing heurstics were choosen a decade ago.
From Eric Dumazet.
11) Fix races, queue bloat, and excessive wakeups in ATM and
associated drivers, from Krzysztof Mazur and David Woodhouse.
12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions
in VXLAN driver, from David Stevens.
13) Add "oops_only" mode to netconsole, from Amerigo Wang.
14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also
allow DCB netlink to work on namespaces other than the initial
namespace. From John Fastabend.
15) Support PTP in the Tigon3 driver, from Matt Carlson.
16) tun/vhost zero copy fixes and improvements, plus turn it on
by default, from Michael S. Tsirkin.
17) Support per-association statistics in SCTP, from Michele
Baldessari.
And many, many, driver updates, cleanups, and improvements. Too
numerous to mention individually.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits)
net/mlx4_en: Add support for destination MAC in steering rules
net/mlx4_en: Use generic etherdevice.h functions.
net: ethtool: Add destination MAC address to flow steering API
bridge: add support of adding and deleting mdb entries
bridge: notify mdb changes via netlink
ndisc: Unexport ndisc_{build,send}_skb().
uapi: add missing netconf.h to export list
pkt_sched: avoid requeues if possible
solos-pci: fix double-free of TX skb in DMA mode
bnx2: Fix accidental reversions.
bna: Driver Version Updated to 3.1.2.1
bna: Firmware update
bna: Add RX State
bna: Rx Page Based Allocation
bna: TX Intr Coalescing Fix
bna: Tx and Rx Optimizations
bna: Code Cleanup and Enhancements
ath9k: check pdata variable before dereferencing it
ath5k: RX timestamp is reported at end of frame
ath9k_htc: RX timestamp is reported at end of frame
...
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 197 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.h | 28 | ||||
-rw-r--r-- | drivers/net/bonding/bond_debugfs.c | 5 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 19 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 13 |
5 files changed, 210 insertions, 52 deletions
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index e15cc11edbbe..7c9d136e74be 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -84,6 +84,10 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); +static void rlb_src_unlink(struct bonding *bond, u32 index); +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, + u32 ip_dst_hash); static inline u8 _simple_hash(const u8 *hash_start, int hash_size) { @@ -354,6 +358,18 @@ static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, if (!arp) goto out; + /* We received an ARP from arp->ip_src. + * We might have used this IP address previously (on the bonding host + * itself or on a system that is bridged together with the bond). + * However, if arp->mac_src is different than what is stored in + * rx_hashtbl, some other host is now using the IP and we must prevent + * sending out client updates with this IP address and the old MAC + * address. + * Clean up all hash table entries that have this address as ip_src but + * have a different mac_src. + */ + rlb_purge_src_ip(bond, arp); + if (arp->op_code == htons(ARPOP_REPLY)) { /* update rx hash table for this ARP */ rlb_update_entry_from_arp(bond, arp); @@ -432,9 +448,9 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) _lock_rx_hashtbl_bh(bond); rx_hash_table = bond_info->rx_hashtbl; - index = bond_info->rx_hashtbl_head; + index = bond_info->rx_hashtbl_used_head; for (; index != RLB_NULL_INDEX; index = next_index) { - next_index = rx_hash_table[index].next; + next_index = rx_hash_table[index].used_next; if (rx_hash_table[index].slave == slave) { struct slave *assigned_slave = rlb_next_rx_slave(bond); @@ -519,8 +535,9 @@ static void rlb_update_rx_clients(struct bonding *bond) _lock_rx_hashtbl_bh(bond); - hash_index = bond_info->rx_hashtbl_head; - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if (client_info->ntt) { rlb_update_client(client_info); @@ -548,8 +565,9 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla _lock_rx_hashtbl_bh(bond); - hash_index = bond_info->rx_hashtbl_head; - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->slave == slave) && @@ -578,8 +596,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) _lock_rx_hashtbl(bond); - hash_index = bond_info->rx_hashtbl_head; - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if (!client_info->slave) { @@ -625,6 +644,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon /* update mac address from arp */ memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); } + memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); assigned_slave = client_info->slave; if (assigned_slave) { @@ -647,6 +667,17 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon assigned_slave = rlb_next_rx_slave(bond); if (assigned_slave) { + if (!(client_info->assigned && + client_info->ip_src == arp->ip_src)) { + /* ip_src is going to be updated, + * fix the src hash list + */ + u32 hash_src = _simple_hash((u8 *)&arp->ip_src, + sizeof(arp->ip_src)); + rlb_src_unlink(bond, hash_index); + rlb_src_link(bond, hash_src, hash_index); + } + client_info->ip_src = arp->ip_src; client_info->ip_dst = arp->ip_dst; /* arp->mac_dst is broadcast for arp reqeusts. @@ -654,6 +685,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon * upon receiving an arp reply. */ memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); client_info->slave = assigned_slave; if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { @@ -669,11 +701,11 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon } if (!client_info->assigned) { - u32 prev_tbl_head = bond_info->rx_hashtbl_head; - bond_info->rx_hashtbl_head = hash_index; - client_info->next = prev_tbl_head; + u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; + bond_info->rx_hashtbl_used_head = hash_index; + client_info->used_next = prev_tbl_head; if (prev_tbl_head != RLB_NULL_INDEX) { - bond_info->rx_hashtbl[prev_tbl_head].prev = + bond_info->rx_hashtbl[prev_tbl_head].used_prev = hash_index; } client_info->assigned = 1; @@ -694,6 +726,12 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + /* Don't modify or load balance ARPs that do not originate locally + * (e.g.,arrive via a bridge). + */ + if (!bond_slave_has_mac(bond, arp->mac_src)) + return NULL; + if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected * rx channel @@ -740,8 +778,9 @@ static void rlb_rebalance(struct bonding *bond) _lock_rx_hashtbl_bh(bond); ntt = 0; - hash_index = bond_info->rx_hashtbl_head; - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); assigned_slave = rlb_next_rx_slave(bond); if (assigned_slave && (client_info->slave != assigned_slave)) { @@ -759,11 +798,113 @@ static void rlb_rebalance(struct bonding *bond) } /* Caller must hold rx_hashtbl lock */ +static void rlb_init_table_entry_dst(struct rlb_client_info *entry) +{ + entry->used_next = RLB_NULL_INDEX; + entry->used_prev = RLB_NULL_INDEX; + entry->assigned = 0; + entry->slave = NULL; + entry->tag = 0; +} +static void rlb_init_table_entry_src(struct rlb_client_info *entry) +{ + entry->src_first = RLB_NULL_INDEX; + entry->src_prev = RLB_NULL_INDEX; + entry->src_next = RLB_NULL_INDEX; +} + static void rlb_init_table_entry(struct rlb_client_info *entry) { memset(entry, 0, sizeof(struct rlb_client_info)); - entry->next = RLB_NULL_INDEX; - entry->prev = RLB_NULL_INDEX; + rlb_init_table_entry_dst(entry); + rlb_init_table_entry_src(entry); +} + +static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next_index = bond_info->rx_hashtbl[index].used_next; + u32 prev_index = bond_info->rx_hashtbl[index].used_prev; + + if (index == bond_info->rx_hashtbl_used_head) + bond_info->rx_hashtbl_used_head = next_index; + if (prev_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[prev_index].used_next = next_index; + if (next_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next_index].used_prev = prev_index; +} + +/* unlink a rlb hash table entry from the src list */ +static void rlb_src_unlink(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next_index = bond_info->rx_hashtbl[index].src_next; + u32 prev_index = bond_info->rx_hashtbl[index].src_prev; + + bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; + bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; + + if (next_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next_index].src_prev = prev_index; + + if (prev_index == RLB_NULL_INDEX) + return; + + /* is prev_index pointing to the head of this list? */ + if (bond_info->rx_hashtbl[prev_index].src_first == index) + bond_info->rx_hashtbl[prev_index].src_first = next_index; + else + bond_info->rx_hashtbl[prev_index].src_next = next_index; + +} + +static void rlb_delete_table_entry(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); + + rlb_delete_table_entry_dst(bond, index); + rlb_init_table_entry_dst(entry); + + rlb_src_unlink(bond, index); +} + +/* add the rx_hashtbl[ip_dst_hash] entry to the list + * of entries with identical ip_src_hash + */ +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next; + + bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; + next = bond_info->rx_hashtbl[ip_src_hash].src_first; + bond_info->rx_hashtbl[ip_dst_hash].src_next = next; + if (next != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; + bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; +} + +/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does + * not match arp->mac_src */ +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); + u32 index; + + _lock_rx_hashtbl_bh(bond); + + index = bond_info->rx_hashtbl[ip_src_hash].src_first; + while (index != RLB_NULL_INDEX) { + struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); + u32 next_index = entry->src_next; + if (entry->ip_src == arp->ip_src && + !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) + rlb_delete_table_entry(bond, index); + index = next_index; + } + _unlock_rx_hashtbl_bh(bond); } static int rlb_initialize(struct bonding *bond) @@ -781,7 +922,7 @@ static int rlb_initialize(struct bonding *bond) bond_info->rx_hashtbl = new_hashtbl; - bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { rlb_init_table_entry(bond_info->rx_hashtbl + i); @@ -803,7 +944,7 @@ static void rlb_deinitialize(struct bonding *bond) kfree(bond_info->rx_hashtbl); bond_info->rx_hashtbl = NULL; - bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; _unlock_rx_hashtbl_bh(bond); } @@ -815,25 +956,13 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) _lock_rx_hashtbl_bh(bond); - curr_index = bond_info->rx_hashtbl_head; + curr_index = bond_info->rx_hashtbl_used_head; while (curr_index != RLB_NULL_INDEX) { struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); - u32 next_index = bond_info->rx_hashtbl[curr_index].next; - u32 prev_index = bond_info->rx_hashtbl[curr_index].prev; - - if (curr->tag && (curr->vlan_id == vlan_id)) { - if (curr_index == bond_info->rx_hashtbl_head) { - bond_info->rx_hashtbl_head = next_index; - } - if (prev_index != RLB_NULL_INDEX) { - bond_info->rx_hashtbl[prev_index].next = next_index; - } - if (next_index != RLB_NULL_INDEX) { - bond_info->rx_hashtbl[next_index].prev = prev_index; - } + u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; - rlb_init_table_entry(curr); - } + if (curr->tag && (curr->vlan_id == vlan_id)) + rlb_delete_table_entry(bond, curr_index); curr_index = next_index; } diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index 90f140a2d197..e7a5b8b37ea3 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -94,15 +94,35 @@ struct tlb_client_info { /* ------------------------------------------------------------------------- * struct rlb_client_info contains all info related to a specific rx client - * connection. This is the Clients Hash Table entry struct + * connection. This is the Clients Hash Table entry struct. + * Note that this is not a proper hash table; if a new client's IP address + * hash collides with an existing client entry, the old entry is replaced. + * + * There is a linked list (linked by the used_next and used_prev members) + * linking all the used entries of the hash table. This allows updating + * all the clients without walking over all the unused elements of the table. + * + * There are also linked lists of entries with identical hash(ip_src). These + * allow cleaning up the table from ip_src<->mac_src associations that have + * become outdated and would cause sending out invalid ARP updates to the + * network. These are linked by the (src_next and src_prev members). * ------------------------------------------------------------------------- */ struct rlb_client_info { __be32 ip_src; /* the server IP address */ __be32 ip_dst; /* the client IP address */ + u8 mac_src[ETH_ALEN]; /* the server MAC address */ u8 mac_dst[ETH_ALEN]; /* the client MAC address */ - u32 next; /* The next Hash table entry index */ - u32 prev; /* The previous Hash table entry index */ + + /* list of used hash table entries, starting at rx_hashtbl_used_head */ + u32 used_next; + u32 used_prev; + + /* ip_src based hashing */ + u32 src_next; /* next entry with same hash(ip_src) */ + u32 src_prev; /* prev entry with same hash(ip_src) */ + u32 src_first; /* first entry with hash(ip_src) == this entry's index */ + u8 assigned; /* checking whether this entry is assigned */ u8 ntt; /* flag - need to transmit client info */ struct slave *slave; /* the slave assigned to this client */ @@ -131,7 +151,7 @@ struct alb_bond_info { int rlb_enabled; struct rlb_client_info *rx_hashtbl; /* Receive hash table */ spinlock_t rx_hashtbl_lock; - u32 rx_hashtbl_head; + u32 rx_hashtbl_used_head; u8 rx_ntt; /* flag - need to transmit * to all rx clients */ diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 2cf084eb9d52..5fc4c2351478 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -31,8 +31,9 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); - hash_index = bond_info->rx_hashtbl_head; - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", &client_info->ip_src, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a7d47350ea4b..ef2cb2418535 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -615,15 +615,9 @@ static int bond_check_dev_link(struct bonding *bond, return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; /* Try to get link status using Ethtool first. */ - if (slave_dev->ethtool_ops) { - if (slave_dev->ethtool_ops->get_link) { - u32 link; - - link = slave_dev->ethtool_ops->get_link(slave_dev); - - return link ? BMSR_LSTATUS : 0; - } - } + if (slave_dev->ethtool_ops->get_link) + return slave_dev->ethtool_ops->get_link(slave_dev) ? + BMSR_LSTATUS : 0; /* Ethtool can't be used, fallback to MII ioctls. */ ioctl = slave_ops->ndo_do_ioctl; @@ -1510,8 +1504,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) int link_reporting; int res = 0; - if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && - slave_ops->ndo_do_ioctl == NULL) { + if (!bond->params.use_carrier && + slave_dev->ethtool_ops->get_link == NULL && + slave_ops->ndo_do_ioctl == NULL) { pr_warning("%s: Warning: no link monitoring support for %s\n", bond_dev->name, slave_dev->name); } @@ -1838,7 +1833,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) * anyway (it holds no special properties of the bond device), * so we can change it without calling change_active_interface() */ - if (!bond->curr_active_slave) + if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) bond->curr_active_slave = new_slave; break; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index f8af2fcd3d16..6dded569b111 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -22,6 +22,7 @@ #include <linux/in6.h> #include <linux/netpoll.h> #include <linux/inetdevice.h> +#include <linux/etherdevice.h> #include "bond_3ad.h" #include "bond_alb.h" @@ -450,6 +451,18 @@ static inline void bond_destroy_proc_dir(struct bond_net *bn) } #endif +static inline struct slave *bond_slave_has_mac(struct bonding *bond, + const u8 *mac) +{ + int i = 0; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, i) + if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) + return tmp; + + return NULL; +} /* exported from bond_main.c */ extern int bond_net_id; |