diff options
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 230 | ||||
-rw-r--r-- | drivers/net/bonding/bond_3ad.h | 1 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 305 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.h | 10 | ||||
-rw-r--r-- | drivers/net/bonding/bond_debugfs.c | 8 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 629 | ||||
-rw-r--r-- | drivers/net/bonding/bond_netlink.c | 41 | ||||
-rw-r--r-- | drivers/net/bonding/bond_options.c | 39 | ||||
-rw-r--r-- | drivers/net/bonding/bond_procfs.c | 27 | ||||
-rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 11 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 45 |
11 files changed, 579 insertions, 767 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index ee2c73a9de39..2110215f3528 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); static int ad_marker_send(struct port *port, struct bond_marker *marker); -static void ad_mux_machine(struct port *port); +static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); static void ad_periodic_machine(struct port *port); -static void ad_port_selection_logic(struct port *port); -static void ad_agg_selection_logic(struct aggregator *aggregator); +static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); +static void ad_agg_selection_logic(struct aggregator *aggregator, + bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); -static void ad_enable_collecting_distributing(struct port *port); -static void ad_disable_collecting_distributing(struct port *port); +static void ad_enable_collecting_distributing(struct port *port, + bool *update_slave_arr); +static void ad_disable_collecting_distributing(struct port *port, + bool *update_slave_arr); static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); static void ad_marker_response_received(struct bond_marker *marker, @@ -234,24 +237,6 @@ static inline int __check_agg_selection_timer(struct port *port) } /** - * __get_state_machine_lock - lock the port's state machines - * @port: the port we're looking at - */ -static inline void __get_state_machine_lock(struct port *port) -{ - spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock)); -} - -/** - * __release_state_machine_lock - unlock the port's state machines - * @port: the port we're looking at - */ -static inline void __release_state_machine_lock(struct port *port) -{ - spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock)); -} - -/** * __get_link_speed - get a port's speed * @port: the port we're looking at * @@ -315,15 +300,14 @@ static u16 __get_link_speed(struct port *port) static u8 __get_duplex(struct port *port) { struct slave *slave = port->slave; - u8 retval; /* handling a special case: when the configuration starts with * link down, it sets the duplex to 0. */ - if (slave->link != BOND_LINK_UP) + if (slave->link != BOND_LINK_UP) { retval = 0x0; - else { + } else { switch (slave->duplex) { case DUPLEX_FULL: retval = 0x1; @@ -341,16 +325,6 @@ static u8 __get_duplex(struct port *port) return retval; } -/** - * __initialize_port_locks - initialize a port's STATE machine spinlock - * @port: the slave of the port we're looking at - */ -static inline void __initialize_port_locks(struct slave *slave) -{ - /* make sure it isn't called twice */ - spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock)); -} - /* Conversions */ /** @@ -825,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker) /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at + * @update_slave_arr: Does slave array need update? */ -static void ad_mux_machine(struct port *port) +static void ad_mux_machine(struct port *port, bool *update_slave_arr) { mux_states_t last_state; @@ -930,7 +905,8 @@ static void ad_mux_machine(struct port *port) switch (port->sm_mux_state) { case AD_MUX_DETACHED: port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; - ad_disable_collecting_distributing(port); + ad_disable_collecting_distributing(port, + update_slave_arr); port->actor_oper_port_state &= ~AD_STATE_COLLECTING; port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->ntt = true; @@ -942,13 +918,15 @@ static void ad_mux_machine(struct port *port) port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~AD_STATE_COLLECTING; port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; - ad_disable_collecting_distributing(port); + ad_disable_collecting_distributing(port, + update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: port->actor_oper_port_state |= AD_STATE_COLLECTING; port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; - ad_enable_collecting_distributing(port); + ad_enable_collecting_distributing(port, + update_slave_arr); port->ntt = true; break; default: @@ -1216,12 +1194,13 @@ static void ad_periodic_machine(struct port *port) /** * ad_port_selection_logic - select aggregation groups * @port: the port we're looking at + * @update_slave_arr: Does slave array need update? * * Select aggregation groups, and assign each port for it's aggregetor. The * selection logic is called in the inititalization (after all the handshkes), * and after every lacpdu receive (if selected is off). */ -static void ad_port_selection_logic(struct port *port) +static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; @@ -1376,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port) __agg_ports_are_ready(port->aggregator)); aggregator = __get_first_agg(port); - ad_agg_selection_logic(aggregator); + ad_agg_selection_logic(aggregator, update_slave_arr); } /* Decide if "agg" is a better choice for the new active aggregator that @@ -1464,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg) /** * ad_agg_selection_logic - select an aggregation group for a team * @aggregator: the aggregator we're looking at + * @update_slave_arr: Does slave array need update? * * It is assumed that only one aggregator may be selected for a team. * @@ -1486,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg) * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. */ -static void ad_agg_selection_logic(struct aggregator *agg) +static void ad_agg_selection_logic(struct aggregator *agg, + bool *update_slave_arr) { struct aggregator *best, *active, *origin; struct bonding *bond = agg->slave->bond; @@ -1579,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg) __disable_port(port); } } + /* Slave array needs update. */ + *update_slave_arr = true; } /* if the selected aggregator is of join individuals @@ -1707,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast) /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at + * @update_slave_arr: Does slave array need update? * * Enable @port if it's in an active aggregator */ -static void ad_enable_collecting_distributing(struct port *port) +static void ad_enable_collecting_distributing(struct port *port, + bool *update_slave_arr) { if (port->aggregator->is_active) { pr_debug("Enabling port %d(LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_port(port); + /* Slave array needs update */ + *update_slave_arr = true; } } /** * ad_disable_collecting_distributing - disable a port's transmit/receive * @port: the port we're looking at + * @update_slave_arr: Does slave array need update? */ -static void ad_disable_collecting_distributing(struct port *port) +static void ad_disable_collecting_distributing(struct port *port, + bool *update_slave_arr) { if (port->aggregator && !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), @@ -1733,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port) port->actor_port_number, port->aggregator->aggregator_identifier); __disable_port(port); + /* Slave array needs an update */ + *update_slave_arr = true; } } @@ -1843,7 +1834,6 @@ void bond_3ad_bind_slave(struct slave *slave) ad_initialize_port(port, bond->params.lacp_fast); - __initialize_port_locks(slave); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and user key(which @@ -1898,7 +1888,10 @@ void bond_3ad_unbind_slave(struct slave *slave) struct bonding *bond = slave->bond; struct slave *slave_iter; struct list_head *iter; + bool dummy_slave_update; /* Ignore this value as caller updates array */ + /* Sync against bond_3ad_state_machine_handler() */ + spin_lock_bh(&bond->mode_lock); aggregator = &(SLAVE_AD_INFO(slave)->aggregator); port = &(SLAVE_AD_INFO(slave)->port); @@ -1906,7 +1899,7 @@ void bond_3ad_unbind_slave(struct slave *slave) if (!port->slave) { netdev_warn(bond->dev, "Trying to unbind an uninitialized port on %s\n", slave->dev->name); - return; + goto out; } netdev_dbg(bond->dev, "Unbinding Link Aggregation Group %d\n", @@ -1979,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave) ad_clear_agg(aggregator); if (select_new_active_agg) - ad_agg_selection_logic(__get_first_agg(port)); + ad_agg_selection_logic(__get_first_agg(port), + &dummy_slave_update); } else { netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); } @@ -1994,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave) /* select new active aggregator */ temp_aggregator = __get_first_agg(port); if (temp_aggregator) - ad_agg_selection_logic(temp_aggregator); + ad_agg_selection_logic(temp_aggregator, + &dummy_slave_update); } } } @@ -2024,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave) if (select_new_active_agg) { netdev_info(bond->dev, "Removing an active aggregator\n"); /* select new active aggregator */ - ad_agg_selection_logic(__get_first_agg(port)); + ad_agg_selection_logic(__get_first_agg(port), + &dummy_slave_update); } } break; @@ -2032,6 +2028,9 @@ void bond_3ad_unbind_slave(struct slave *slave) } } port->slave = NULL; + +out: + spin_unlock_bh(&bond->mode_lock); } /** @@ -2056,8 +2055,13 @@ void bond_3ad_state_machine_handler(struct work_struct *work) struct slave *slave; struct port *port; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; + bool update_slave_arr = false; - read_lock(&bond->lock); + /* Lock to protect data accessed by all (e.g., port->sm_vars) and + * against running with bond_3ad_unbind_slave. ad_rx_machine may run + * concurrently due to incoming LACPDU as well. + */ + spin_lock_bh(&bond->mode_lock); rcu_read_lock(); /* check if there are any slaves */ @@ -2079,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } aggregator = __get_first_agg(port); - ad_agg_selection_logic(aggregator); + ad_agg_selection_logic(aggregator, &update_slave_arr); } bond_3ad_set_carrier(bond); } @@ -2093,23 +2097,15 @@ void bond_3ad_state_machine_handler(struct work_struct *work) goto re_arm; } - /* Lock around state machines to protect data accessed - * by all (e.g., port->sm_vars). ad_rx_machine may run - * concurrently due to incoming LACPDU. - */ - __get_state_machine_lock(port); - ad_rx_machine(NULL, port); ad_periodic_machine(port); - ad_port_selection_logic(port); - ad_mux_machine(port); + ad_port_selection_logic(port, &update_slave_arr); + ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; - - __release_state_machine_lock(port); } re_arm: @@ -2120,7 +2116,10 @@ re_arm: } } rcu_read_unlock(); - read_unlock(&bond->lock); + spin_unlock_bh(&bond->mode_lock); + + if (update_slave_arr) + bond_slave_arr_work_rearm(bond, 0); if (should_notify_rtnl && rtnl_trylock()) { bond_slave_state_notify(bond); @@ -2161,9 +2160,9 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n", port->actor_port_number); /* Protect against concurrent state machines */ - __get_state_machine_lock(port); + spin_lock(&slave->bond->mode_lock); ad_rx_machine(lacpdu, port); - __release_state_machine_lock(port); + spin_unlock(&slave->bond->mode_lock); break; case AD_TYPE_MARKER: @@ -2213,7 +2212,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave) return; } - __get_state_machine_lock(port); + spin_lock_bh(&slave->bond->mode_lock); port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= @@ -2224,7 +2223,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave) */ port->sm_vars |= AD_PORT_BEGIN; - __release_state_machine_lock(port); + spin_unlock_bh(&slave->bond->mode_lock); } /** @@ -2246,7 +2245,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave) return; } - __get_state_machine_lock(port); + spin_lock_bh(&slave->bond->mode_lock); port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= @@ -2257,7 +2256,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave) */ port->sm_vars |= AD_PORT_BEGIN; - __release_state_machine_lock(port); + spin_unlock_bh(&slave->bond->mode_lock); } /** @@ -2280,7 +2279,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) return; } - __get_state_machine_lock(port); + spin_lock_bh(&slave->bond->mode_lock); /* on link down we are zeroing duplex and speed since * some of the adaptors(ce1000.lan) report full duplex/speed * instead of N/A(duplex) / 0(speed). @@ -2311,7 +2310,12 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) */ port->sm_vars |= AD_PORT_BEGIN; - __release_state_machine_lock(port); + spin_unlock_bh(&slave->bond->mode_lock); + + /* RTNL is held and mode_lock is released so it's safe + * to update slave_array here. + */ + bond_update_slave_arr(slave->bond, NULL); } /** @@ -2395,7 +2399,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, return 0; } -/* Wrapper used to hold bond->lock so no slave manipulation can occur */ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; @@ -2407,90 +2410,19 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) return ret; } -int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) -{ - struct bonding *bond = netdev_priv(dev); - struct slave *slave, *first_ok_slave; - struct aggregator *agg; - struct ad_info ad_info; - struct list_head *iter; - int slaves_in_agg; - int slave_agg_no; - int agg_id; - - if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { - netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n"); - goto err_free; - } - - slaves_in_agg = ad_info.ports; - agg_id = ad_info.aggregator_id; - - if (slaves_in_agg == 0) { - netdev_dbg(dev, "active aggregator is empty\n"); - goto err_free; - } - - slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg; - first_ok_slave = NULL; - - bond_for_each_slave_rcu(bond, slave, iter) { - agg = SLAVE_AD_INFO(slave)->port.aggregator; - if (!agg || agg->aggregator_identifier != agg_id) - continue; - - if (slave_agg_no >= 0) { - if (!first_ok_slave && bond_slave_can_tx(slave)) - first_ok_slave = slave; - slave_agg_no--; - continue; - } - - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - goto out; - } - } - - if (slave_agg_no >= 0) { - netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n", - agg_id); - goto err_free; - } - - /* we couldn't find any suitable slave after the agg_no, so use the - * first suitable found, if found. - */ - if (first_ok_slave) - bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); - else - goto err_free; - -out: - return NETDEV_TX_OK; -err_free: - /* no suitable interface, frame not sent */ - dev_kfree_skb_any(skb); - goto out; -} - int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { - int ret = RX_HANDLER_ANOTHER; struct lacpdu *lacpdu, _lacpdu; if (skb->protocol != PKT_TYPE_LACPDU) - return ret; + return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); if (!lacpdu) - return ret; + return RX_HANDLER_ANOTHER; - read_lock(&bond->lock); - ret = bond_3ad_rx_indication(lacpdu, slave, skb->len); - read_unlock(&bond->lock); - return ret; + return bond_3ad_rx_indication(lacpdu, slave, skb->len); } /** @@ -2500,7 +2432,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, * When modify lacp_rate parameter via sysfs, * update actor_oper_port_state of each port. * - * Hold slave->state_machine_lock, + * Hold bond->mode_lock, * so we can modify port->actor_oper_port_state, * no matter bond is up or down. */ @@ -2512,13 +2444,13 @@ void bond_3ad_update_lacp_rate(struct bonding *bond) int lacp_fast; lacp_fast = bond->params.lacp_fast; + spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); - __get_state_machine_lock(port); if (lacp_fast) port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; else port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT; - __release_state_machine_lock(port); } + spin_unlock_bh(&bond->mode_lock); } diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index bb03b1df2f3e..c5f14ac63f3e 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -259,7 +259,6 @@ struct ad_bond_info { struct ad_slave_info { struct aggregator aggregator; /* 802.3ad aggregator structure */ struct port port; /* 802.3ad port structure */ - spinlock_t state_machine_lock; /* mutex state machines vs. incoming LACPDU */ u16 id; }; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 95dd1f58c260..d2eadab787c5 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -100,27 +100,6 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size) /*********************** tlb specific functions ***************************/ -static inline void _lock_tx_hashtbl_bh(struct bonding *bond) -{ - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _unlock_tx_hashtbl_bh(struct bonding *bond) -{ - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _lock_tx_hashtbl(struct bonding *bond) -{ - spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _unlock_tx_hashtbl(struct bonding *bond) -{ - spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -/* Caller must hold tx_hashtbl lock */ static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) { if (save_load) { @@ -140,7 +119,6 @@ static inline void tlb_init_slave(struct slave *slave) SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; } -/* Caller must hold bond lock for read, BH disabled */ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { @@ -163,13 +141,12 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, tlb_init_slave(slave); } -/* Caller must hold bond lock for read */ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); __tlb_clear_slave(bond, slave, save_load); - _unlock_tx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* Must be called before starting the monitor timer */ @@ -184,14 +161,14 @@ static int tlb_initialize(struct bonding *bond) if (!new_hashtbl) return -1; - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); bond_info->tx_hashtbl = new_hashtbl; for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); - _unlock_tx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); return 0; } @@ -200,18 +177,13 @@ static int tlb_initialize(struct bonding *bond) static void tlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct tlb_up_slave *arr; - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); kfree(bond_info->tx_hashtbl); bond_info->tx_hashtbl = NULL; - _unlock_tx_hashtbl_bh(bond); - - arr = rtnl_dereference(bond_info->slave_arr); - if (arr) - kfree_rcu(arr, rcu); + spin_unlock_bh(&bond->mode_lock); } static long long compute_gap(struct slave *slave) @@ -220,7 +192,6 @@ static long long compute_gap(struct slave *slave) (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ } -/* Caller must hold bond lock for read */ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) { struct slave *slave, *least_loaded; @@ -281,42 +252,23 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, return assigned_slave; } -/* Caller must hold bond lock for read */ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) { struct slave *tx_slave; - /* - * We don't need to disable softirq here, becase + + /* We don't need to disable softirq here, becase * tlb_choose_channel() is only called by bond_alb_xmit() * which already has softirq disabled. */ - _lock_tx_hashtbl(bond); + spin_lock(&bond->mode_lock); tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); - _unlock_tx_hashtbl(bond); + spin_unlock(&bond->mode_lock); + return tx_slave; } /*********************** rlb specific functions ***************************/ -static inline void _lock_rx_hashtbl_bh(struct bonding *bond) -{ - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _unlock_rx_hashtbl_bh(struct bonding *bond) -{ - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _lock_rx_hashtbl(struct bonding *bond) -{ - spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _unlock_rx_hashtbl(struct bonding *bond) -{ - spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} /* when an ARP REPLY is received from a client update its info * in the rx_hashtbl @@ -327,7 +279,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -342,7 +294,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) bond_info->rx_ntt = 1; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, @@ -378,15 +330,15 @@ out: return RX_HANDLER_ANOTHER; } -/* Caller must hold bond lock for read */ -static struct slave *rlb_next_rx_slave(struct bonding *bond) +/* Caller must hold rcu_read_lock() */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *before = NULL, *rx_slave = NULL, *slave; struct list_head *iter; bool found = false; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (!bond_slave_can_tx(slave)) continue; if (!found) { @@ -411,35 +363,16 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond) return rx_slave; } -/* Caller must hold rcu_read_lock() for read */ -static struct slave *__rlb_next_rx_slave(struct bonding *bond) +/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */ +static struct slave *rlb_next_rx_slave(struct bonding *bond) { - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *before = NULL, *rx_slave = NULL, *slave; - struct list_head *iter; - bool found = false; + struct slave *rx_slave; - bond_for_each_slave_rcu(bond, slave, iter) { - if (!bond_slave_can_tx(slave)) - continue; - if (!found) { - if (!before || before->speed < slave->speed) - before = slave; - } else { - if (!rx_slave || rx_slave->speed < slave->speed) - rx_slave = slave; - } - if (slave == bond_info->rx_slave) - found = true; - } - /* we didn't find anything after the current or we have something - * better before and up to the current slave - */ - if (!rx_slave || (before && rx_slave->speed < before->speed)) - rx_slave = before; + ASSERT_RTNL(); - if (rx_slave) - bond_info->rx_slave = rx_slave; + rcu_read_lock(); + rx_slave = __rlb_next_rx_slave(bond); + rcu_read_unlock(); return rx_slave; } @@ -447,11 +380,11 @@ static struct slave *__rlb_next_rx_slave(struct bonding *bond) /* teach the switch the mac of a disabled slave * on the primary for fault tolerance * - * Caller must hold bond->curr_slave_lock for write or bond lock for write + * Caller must hold RTNL */ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) { - struct slave *curr_active = bond_deref_active_protected(bond); + struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); if (!curr_active) return; @@ -479,7 +412,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) u32 index, next_index; /* clear slave from rx_hashtbl */ - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); rx_hash_table = bond_info->rx_hashtbl; index = bond_info->rx_hashtbl_used_head; @@ -510,14 +443,10 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) } } - _unlock_rx_hashtbl_bh(bond); - - write_lock_bh(&bond->curr_slave_lock); + spin_unlock_bh(&bond->mode_lock); - if (slave != bond_deref_active_protected(bond)) + if (slave != rtnl_dereference(bond->curr_active_slave)) rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); - - write_unlock_bh(&bond->curr_slave_lock); } static void rlb_update_client(struct rlb_client_info *client_info) @@ -565,7 +494,7 @@ static void rlb_update_rx_clients(struct bonding *bond) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -583,7 +512,7 @@ static void rlb_update_rx_clients(struct bonding *bond) */ bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* The slave was assigned a new mac address - update the clients */ @@ -594,7 +523,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla int ntt = 0; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -615,7 +544,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* mark all clients using src_ip to be updated */ @@ -625,7 +554,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl(bond); + spin_lock(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -636,7 +565,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); continue; } - /*update all clients using this src_ip, that are not assigned + /* update all clients using this src_ip, that are not assigned * to the team's address (curr_active_slave) and have a known * unicast mac address. */ @@ -649,10 +578,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) } } - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); } -/* Caller must hold both bond and ptr locks for read */ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); @@ -661,7 +589,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon struct rlb_client_info *client_info; u32 hash_index = 0; - _lock_rx_hashtbl(bond); + spin_lock(&bond->mode_lock); curr_active_slave = rcu_dereference(bond->curr_active_slave); @@ -680,7 +608,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon assigned_slave = client_info->slave; if (assigned_slave) { - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); return assigned_slave; } } else { @@ -742,7 +670,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon } } - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); return assigned_slave; } @@ -763,9 +691,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) return NULL; if (arp->op_code == htons(ARPOP_REPLY)) { - /* the arp must be sent on the selected - * rx channel - */ + /* the arp must be sent on the selected rx channel */ tx_slave = rlb_choose_channel(skb, bond); if (tx_slave) ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr); @@ -795,7 +721,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) return tx_slave; } -/* Caller must hold bond lock for read */ static void rlb_rebalance(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); @@ -804,7 +729,7 @@ static void rlb_rebalance(struct bonding *bond) int ntt; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); ntt = 0; hash_index = bond_info->rx_hashtbl_used_head; @@ -822,10 +747,10 @@ static void rlb_rebalance(struct bonding *bond) /* update the team's flag only after the whole iteration */ if (ntt) bond_info->rx_ntt = 1; - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } -/* Caller must hold rx_hashtbl lock */ +/* Caller must hold mode_lock */ static void rlb_init_table_entry_dst(struct rlb_client_info *entry) { entry->used_next = RLB_NULL_INDEX; @@ -913,15 +838,16 @@ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; } -/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does - * not match arp->mac_src */ +/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does + * not match arp->mac_src + */ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); u32 index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); index = bond_info->rx_hashtbl[ip_src_hash].src_first; while (index != RLB_NULL_INDEX) { @@ -932,7 +858,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) rlb_delete_table_entry(bond, index); index = next_index; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static int rlb_initialize(struct bonding *bond) @@ -946,7 +872,7 @@ static int rlb_initialize(struct bonding *bond) if (!new_hashtbl) return -1; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); bond_info->rx_hashtbl = new_hashtbl; @@ -955,7 +881,7 @@ static int rlb_initialize(struct bonding *bond) for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) rlb_init_table_entry(bond_info->rx_hashtbl + i); - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); /* register to receive ARPs */ bond->recv_probe = rlb_arp_recv; @@ -967,13 +893,13 @@ static void rlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); kfree(bond_info->rx_hashtbl); bond_info->rx_hashtbl = NULL; bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) @@ -981,7 +907,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 curr_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); curr_index = bond_info->rx_hashtbl_used_head; while (curr_index != RLB_NULL_INDEX) { @@ -994,7 +920,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) curr_index = next_index; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /*********************** tlb/rlb shared functions *********************/ @@ -1091,8 +1017,9 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) return 0; } - /* for rlb each slave must have a unique hw mac addresses so that */ - /* each slave will receive packets destined to a different mac */ + /* for rlb each slave must have a unique hw mac addresses so that + * each slave will receive packets destined to a different mac + */ memcpy(s_addr.sa_data, addr, dev->addr_len); s_addr.sa_family = dev->type; if (dev_set_mac_address(dev, &s_addr)) { @@ -1103,13 +1030,10 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) return 0; } -/* - * Swap MAC addresses between two slaves. +/* Swap MAC addresses between two slaves. * * Called with RTNL held, and no other locks. - * */ - static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) { u8 tmp_mac_addr[ETH_ALEN]; @@ -1120,8 +1044,7 @@ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) } -/* - * Send learning packets after MAC address swap. +/* Send learning packets after MAC address swap. * * Called with RTNL and no other locks */ @@ -1194,7 +1117,6 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); if (found_slave) { - /* locking: needs RTNL and nothing else */ alb_swap_mac_addr(slave, found_slave); alb_fasten_mac_swap(bond, slave, found_slave); } @@ -1243,7 +1165,8 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav return 0; /* Try setting slave mac to bond address and fall-through - to code handling that situation below... */ + * to code handling that situation below... + */ alb_set_slave_mac_addr(slave, bond->dev->dev_addr); } @@ -1351,7 +1274,6 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) if (rlb_enabled) { bond->alb_info.rlb_enabled = 1; - /* initialize rlb */ res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); @@ -1375,7 +1297,7 @@ void bond_alb_deinitialize(struct bonding *bond) } static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, - struct slave *tx_slave) + struct slave *tx_slave) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct ethhdr *eth_data = eth_hdr(skb); @@ -1388,7 +1310,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, } if (tx_slave && bond_slave_can_tx(tx_slave)) { - if (tx_slave != rcu_dereference(bond->curr_active_slave)) { + if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) { ether_addr_copy(eth_data->h_source, tx_slave->dev->dev_addr); } @@ -1398,9 +1320,9 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, } if (tx_slave && bond->params.tlb_dynamic_lb) { - _lock_tx_hashtbl(bond); + spin_lock(&bond->mode_lock); __tlb_clear_slave(bond, tx_slave, 0); - _unlock_tx_hashtbl(bond); + spin_unlock(&bond->mode_lock); } /* no suitable interface, frame not sent */ @@ -1409,39 +1331,9 @@ out: return NETDEV_TX_OK; } -static int bond_tlb_update_slave_arr(struct bonding *bond, - struct slave *skipslave) -{ - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *tx_slave; - struct list_head *iter; - struct tlb_up_slave *new_arr, *old_arr; - - new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]), - GFP_ATOMIC); - if (!new_arr) - return -ENOMEM; - - bond_for_each_slave(bond, tx_slave, iter) { - if (!bond_slave_can_tx(tx_slave)) - continue; - if (skipslave == tx_slave) - continue; - new_arr->arr[new_arr->count++] = tx_slave; - } - - old_arr = rtnl_dereference(bond_info->slave_arr); - rcu_assign_pointer(bond_info->slave_arr, new_arr); - if (old_arr) - kfree_rcu(old_arr, rcu); - - return 0; -} - int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct ethhdr *eth_data; struct slave *tx_slave = NULL; u32 hash_index; @@ -1462,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) hash_index & 0xFF, skb->len); } else { - struct tlb_up_slave *slaves; + struct bond_up_slave *slaves; + unsigned int count; - slaves = rcu_dereference(bond_info->slave_arr); - if (slaves && slaves->count) + slaves = rcu_dereference(bond->slave_arr); + count = slaves ? ACCESS_ONCE(slaves->count) : 0; + if (likely(count)) tx_slave = slaves->arr[hash_index % - slaves->count]; + count]; } break; } @@ -1595,13 +1489,6 @@ void bond_alb_monitor(struct work_struct *work) if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { bool strict_match; - /* change of curr_active_slave involves swapping of mac addresses. - * in order to avoid this swapping from happening while - * sending the learning packets, the curr_slave_lock must be held for - * read. - */ - read_lock(&bond->curr_slave_lock); - bond_for_each_slave_rcu(bond, slave, iter) { /* If updating current_active, use all currently * user mac addreses (!strict_match). Otherwise, only @@ -1613,17 +1500,11 @@ void bond_alb_monitor(struct work_struct *work) alb_send_learning_packets(slave, slave->dev->dev_addr, strict_match); } - - read_unlock(&bond->curr_slave_lock); - bond_info->lp_counter = 0; } /* rebalance tx traffic */ if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { - - read_lock(&bond->curr_slave_lock); - bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { @@ -1633,19 +1514,14 @@ void bond_alb_monitor(struct work_struct *work) bond_info->unbalanced_load = 0; } } - - read_unlock(&bond->curr_slave_lock); - bond_info->tx_rebalance_counter = 0; } - /* handle rlb stuff */ if (bond_info->rlb_enabled) { if (bond_info->primary_is_promisc && (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { - /* - * dev_set_promiscuity requires rtnl and + /* dev_set_promiscuity requires rtnl and * nothing else. Avoid race with bond_close. */ rcu_read_unlock(); @@ -1715,8 +1591,7 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) return 0; } -/* - * Remove slave from tlb and rlb hash tables, and fix up MAC addresses +/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses * if necessary. * * Caller must hold RTNL and no other locks @@ -1733,13 +1608,8 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) rlb_clear_slave(bond, slave); } - if (bond_is_nondyn_tlb(bond)) - if (bond_tlb_update_slave_arr(bond, slave)) - pr_err("Failed to build slave-array for TLB mode.\n"); - } -/* Caller must hold bond lock for read */ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); @@ -1762,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char } if (bond_is_nondyn_tlb(bond)) { - if (bond_tlb_update_slave_arr(bond, NULL)) + if (bond_update_slave_arr(bond, NULL)) pr_err("Failed to build slave-array for TLB mode.\n"); } } @@ -1775,22 +1645,14 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char * Set the bond->curr_active_slave to @new_slave and handle * mac address swapping and promiscuity changes as needed. * - * If new_slave is NULL, caller must hold curr_slave_lock or - * bond->lock for write. - * - * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock - * for write. Processing here may sleep, so no other locks may be held. + * Caller must hold RTNL */ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) - __releases(&bond->curr_slave_lock) - __acquires(&bond->curr_slave_lock) { struct slave *swap_slave; struct slave *curr_active; - curr_active = rcu_dereference_protected(bond->curr_active_slave, - !new_slave || - lockdep_is_held(&bond->curr_slave_lock)); + curr_active = rtnl_dereference(bond->curr_active_slave); if (curr_active == new_slave) return; @@ -1812,8 +1674,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave if (!swap_slave) swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); - /* - * Arrange for swap_slave and new_slave to temporarily be + /* Arrange for swap_slave and new_slave to temporarily be * ignored so we can mess with their MAC addresses without * fear of interference from transmit activity. */ @@ -1821,10 +1682,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave tlb_clear_slave(bond, swap_slave, 1); tlb_clear_slave(bond, new_slave, 1); - write_unlock_bh(&bond->curr_slave_lock); - - ASSERT_RTNL(); - /* in TLB mode, the slave might flip down/up with the old dev_addr, * and thus filter bond->dev_addr's packets, so force bond's mac */ @@ -1853,16 +1710,10 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave alb_send_learning_packets(new_slave, bond->dev->dev_addr, false); } - - write_lock_bh(&bond->curr_slave_lock); } -/* - * Called with RTNL - */ +/* Called with RTNL */ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) - __acquires(&bond->lock) - __releases(&bond->lock) { struct bonding *bond = netdev_priv(bond_dev); struct sockaddr *sa = addr; @@ -1895,14 +1746,12 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) } else { alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr); - read_lock(&bond->lock); alb_send_learning_packets(curr_active, bond_dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform clients mac address has changed */ rlb_req_update_slave_clients(bond, curr_active); } - read_unlock(&bond->lock); } return 0; diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index aaeac61d03cf..1ad473b4ade5 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -139,24 +139,14 @@ struct tlb_slave_info { */ }; -struct tlb_up_slave { - unsigned int count; - struct rcu_head rcu; - struct slave *arr[0]; -}; - struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ - spinlock_t tx_hashtbl_lock; u32 unbalanced_load; int tx_rebalance_counter; int lp_counter; - /* -------- non-dynamic tlb mode only ---------*/ - struct tlb_up_slave __rcu *slave_arr; /* Up slaves */ /* -------- rlb parameters -------- */ int rlb_enabled; struct rlb_client_info *rx_hashtbl; /* Receive hash table */ - spinlock_t rx_hashtbl_lock; u32 rx_hashtbl_used_head; u8 rx_ntt; /* flag - need to transmit * to all rx clients diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 280971b227ea..8f99082f90eb 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -13,9 +13,7 @@ static struct dentry *bonding_debug_root; -/* - * Show RLB hash table - */ +/* Show RLB hash table */ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) { struct bonding *bond = m->private; @@ -29,7 +27,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) seq_printf(m, "SourceIP DestinationIP " "Destination MAC DEV\n"); - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); + spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -42,7 +40,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) client_info->slave->dev->name); } - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); + spin_unlock_bh(&bond->mode_lock); return 0; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 798ae69fb63c..c9ac06cfe6b7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -208,6 +208,9 @@ static int lacp_fast; static int bond_init(struct net_device *bond_dev); static void bond_uninit(struct net_device *bond_dev); +static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, + struct rtnl_link_stats64 *stats); +static void bond_slave_arr_handler(struct work_struct *work); /*---------------------------- General routines -----------------------------*/ @@ -253,8 +256,7 @@ void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, dev_queue_xmit(skb); } -/* - * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, +/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, * We don't protect the slave list iteration with a lock because: * a. This operation is performed in IOCTL context, * b. The operation is protected by the RTNL semaphore in the 8021q code, @@ -326,8 +328,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, /*------------------------------- Link status -------------------------------*/ -/* - * Set the carrier state for the master according to the state of its +/* Set the carrier state for the master according to the state of its * slaves. If any slaves are up, the master is up. In 802.3ad mode, * do special 802.3ad magic. * @@ -362,8 +363,7 @@ down: return 0; } -/* - * Get link speed and duplex from the slave's base driver +/* Get link speed and duplex from the slave's base driver * using ethtool. If for some reason the call fails or the * values are invalid, set speed and duplex to -1, * and return. @@ -416,8 +416,7 @@ const char *bond_slave_link_status(s8 link) } } -/* - * if <dev> supports MII link status reporting, check its link status. +/* if <dev> supports MII link status reporting, check its link status. * * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), * depending upon the setting of the use_carrier parameter. @@ -454,14 +453,14 @@ static int bond_check_dev_link(struct bonding *bond, /* Ethtool can't be used, fallback to MII ioctls. */ ioctl = slave_ops->ndo_do_ioctl; if (ioctl) { - /* TODO: set pointer to correct ioctl on a per team member */ - /* bases to make this more efficient. that is, once */ - /* we determine the correct ioctl, we will always */ - /* call it and not the others for that team */ - /* member. */ - - /* - * We cannot assume that SIOCGMIIPHY will also read a + /* TODO: set pointer to correct ioctl on a per team member + * bases to make this more efficient. that is, once + * we determine the correct ioctl, we will always + * call it and not the others for that team + * member. + */ + + /* We cannot assume that SIOCGMIIPHY will also read a * register; not all network drivers (e.g., e100) * support that. */ @@ -476,8 +475,7 @@ static int bond_check_dev_link(struct bonding *bond, } } - /* - * If reporting, report that either there's no dev->do_ioctl, + /* If reporting, report that either there's no dev->do_ioctl, * or both SIOCGMIIREG and get_link failed (meaning that we * cannot report link status). If not reporting, pretend * we're ok. @@ -487,9 +485,7 @@ static int bond_check_dev_link(struct bonding *bond, /*----------------------------- Multicast list ------------------------------*/ -/* - * Push the promiscuity flag down to appropriate slaves - */ +/* Push the promiscuity flag down to appropriate slaves */ static int bond_set_promiscuity(struct bonding *bond, int inc) { struct list_head *iter; @@ -512,9 +508,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc) return err; } -/* - * Push the allmulti flag down to all slaves - */ +/* Push the allmulti flag down to all slaves */ static int bond_set_allmulti(struct bonding *bond, int inc) { struct list_head *iter; @@ -537,8 +531,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc) return err; } -/* - * Retrieve the list of registered multicast addresses for the bonding +/* Retrieve the list of registered multicast addresses for the bonding * device and retransmit an IGMP JOIN request to the current active * slave. */ @@ -560,8 +553,7 @@ static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) rtnl_unlock(); } -/* Flush bond's hardware addresses from slave - */ +/* Flush bond's hardware addresses from slave */ static void bond_hw_addr_flush(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -588,8 +580,6 @@ static void bond_hw_addr_flush(struct net_device *bond_dev, static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) { - ASSERT_RTNL(); - if (old_active) { if (bond->dev->flags & IFF_PROMISC) dev_set_promiscuity(old_active->dev, -1); @@ -632,18 +622,15 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } -/* - * bond_do_fail_over_mac +/* bond_do_fail_over_mac * * Perform special MAC address swapping for fail_over_mac settings * - * Called with RTNL, curr_slave_lock for write_bh. + * Called with RTNL */ static void bond_do_fail_over_mac(struct bonding *bond, struct slave *new_active, struct slave *old_active) - __releases(&bond->curr_slave_lock) - __acquires(&bond->curr_slave_lock) { u8 tmp_mac[ETH_ALEN]; struct sockaddr saddr; @@ -651,23 +638,17 @@ static void bond_do_fail_over_mac(struct bonding *bond, switch (bond->params.fail_over_mac) { case BOND_FOM_ACTIVE: - if (new_active) { - write_unlock_bh(&bond->curr_slave_lock); + if (new_active) bond_set_dev_addr(bond->dev, new_active->dev); - write_lock_bh(&bond->curr_slave_lock); - } break; case BOND_FOM_FOLLOW: - /* - * if new_active && old_active, swap them + /* if new_active && old_active, swap them * if just old_active, do nothing (going to no active slave) * if just new_active, set new_active to bond's MAC */ if (!new_active) return; - write_unlock_bh(&bond->curr_slave_lock); - if (old_active) { ether_addr_copy(tmp_mac, new_active->dev->dev_addr); ether_addr_copy(saddr.sa_data, @@ -696,7 +677,6 @@ static void bond_do_fail_over_mac(struct bonding *bond, netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", -rv, new_active->dev->name); out: - write_lock_bh(&bond->curr_slave_lock); break; default: netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n", @@ -708,8 +688,8 @@ out: static bool bond_should_change_active(struct bonding *bond) { - struct slave *prim = bond->primary_slave; - struct slave *curr = bond_deref_active_protected(bond); + struct slave *prim = rtnl_dereference(bond->primary_slave); + struct slave *curr = rtnl_dereference(bond->curr_active_slave); if (!prim || !curr || curr->link != BOND_LINK_UP) return true; @@ -732,13 +712,14 @@ static bool bond_should_change_active(struct bonding *bond) */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *slave, *bestslave = NULL; + struct slave *slave, *bestslave = NULL, *primary; struct list_head *iter; int mintime = bond->params.updelay; - if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && + primary = rtnl_dereference(bond->primary_slave); + if (primary && primary->link == BOND_LINK_UP && bond_should_change_active(bond)) - return bond->primary_slave; + return primary; bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) @@ -784,15 +765,15 @@ static bool bond_should_notify_peers(struct bonding *bond) * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * - * If new_active is not NULL, caller must hold curr_slave_lock for write_bh. + * Caller must hold RTNL. */ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) { struct slave *old_active; - old_active = rcu_dereference_protected(bond->curr_active_slave, - !new_active || - lockdep_is_held(&bond->curr_slave_lock)); + ASSERT_RTNL(); + + old_active = rtnl_dereference(bond->curr_active_slave); if (old_active == new_active) return; @@ -860,21 +841,18 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) bond_should_notify_peers(bond); } - write_unlock_bh(&bond->curr_slave_lock); - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - - write_lock_bh(&bond->curr_slave_lock); } } /* resend IGMP joins since active slave has changed or * all were sent on curr_active_slave. * resend only if bond is brought up with the affected - * bonding modes and the retransmission is enabled */ + * bonding modes and the retransmission is enabled + */ if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && ((bond_uses_primary(bond) && new_active) || BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) { @@ -892,15 +870,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * - * Caller must hold curr_slave_lock for write_bh. + * Caller must hold RTNL. */ void bond_select_active_slave(struct bonding *bond) { struct slave *best_slave; int rv; + ASSERT_RTNL(); + best_slave = bond_find_best_slave(bond); - if (best_slave != bond_deref_active_protected(bond)) { + if (best_slave != rtnl_dereference(bond->curr_active_slave)) { bond_change_active_slave(bond, best_slave); rv = bond_set_carrier(bond); if (!rv) @@ -1022,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev, static void bond_compute_features(struct bonding *bond) { - unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; + unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | + IFF_XMIT_DST_RELEASE_PERM; netdev_features_t vlan_features = BOND_VLAN_FEATURES; netdev_features_t enc_features = BOND_ENC_FEATURES; struct net_device *bond_dev = bond->dev; @@ -1058,8 +1039,10 @@ done: bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); - flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE; - bond_dev->priv_flags = flags | dst_release_flag; + bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) && + dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) + bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE; netdev_change_features(bond_dev); } @@ -1240,8 +1223,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) slave_dev->name); } - /* - * Old ifenslave binaries are no longer supported. These can + /* Old ifenslave binaries are no longer supported. These can * be identified with moderate accuracy by the state of the slave: * the current ifenslave will set the interface down prior to * enslaving it; the old ifenslave will not. @@ -1313,7 +1295,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) call_netdevice_notifiers(NETDEV_JOIN, slave_dev); /* If this is the first slave, then we need to set the master's hardware - * address to be the same as the slave's. */ + * address to be the same as the slave's. + */ if (!bond_has_slaves(bond) && bond->dev->addr_assign_type == NET_ADDR_RANDOM) bond_set_dev_addr(bond->dev, slave_dev); @@ -1326,8 +1309,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->bond = bond; new_slave->dev = slave_dev; - /* - * Set the new_slave's queue_id to be zero. Queue ID mapping + /* Set the new_slave's queue_id to be zero. Queue ID mapping * is set via sysfs or module option if desired. */ new_slave->queue_id = 0; @@ -1340,8 +1322,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_free; } - /* - * Save slave's original ("permanent") mac address for modes + /* Save slave's original ("permanent") mac address for modes * that need it, and for restoring it upon release, and then * set it to the master's address */ @@ -1349,8 +1330,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (!bond->params.fail_over_mac || BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - /* - * Set slave to master's mac address. The application already + /* Set slave to master's mac address. The application already * set the master's mac address to that of the first slave */ memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); @@ -1370,6 +1350,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } slave_dev->priv_flags |= IFF_BONDING; + /* initialize slave stats */ + dev_get_stats(new_slave->dev, &new_slave->slave_stats); if (bond_is_lb(bond)) { /* bond_alb_init_slave() must be called before all other stages since @@ -1436,8 +1418,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) link_reporting = bond_check_dev_link(bond, slave_dev, 1); if ((link_reporting == -1) && !bond->params.arp_interval) { - /* - * miimon is set but a bonded network driver + /* miimon is set but a bonded network driver * does not support ETHTOOL/MII and * arp_interval is not set. Note: if * use_carrier is enabled, we will never go @@ -1482,7 +1463,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_uses_primary(bond) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { - bond->primary_slave = new_slave; + rcu_assign_pointer(bond->primary_slave, new_slave); bond->force_primary = true; } } @@ -1570,12 +1551,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_uses_primary(bond)) { block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); } + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); + netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, bond_is_active_slave(new_slave) ? "an active" : "a backup", @@ -1596,16 +1578,16 @@ err_detach: bond_hw_addr_flush(bond_dev, slave_dev); vlan_vids_del_by_dev(slave_dev, bond_dev); - if (bond->primary_slave == new_slave) - bond->primary_slave = NULL; + if (rcu_access_pointer(bond->primary_slave) == new_slave) + RCU_INIT_POINTER(bond->primary_slave, NULL); if (rcu_access_pointer(bond->curr_active_slave) == new_slave) { block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); } + /* either primary_slave or curr_active_slave might've changed */ + synchronize_rcu(); slave_disable_netpoll(new_slave); err_close: @@ -1639,10 +1621,9 @@ err_undo_flags: return res; } -/* - * Try to release the slave device <slave> from the bond device <master> +/* Try to release the slave device <slave> from the bond device <master> * It is legal to access curr_active_slave without a lock because all the function - * is write-locked. If "all" is true it means that the function is being called + * is RTNL-locked. If "all" is true it means that the function is being called * while destroying a bond interface and all slaves are being released. * * The rules for slave state should be: @@ -1682,18 +1663,20 @@ static int __bond_release_one(struct net_device *bond_dev, bond_sysfs_slave_del(slave); + /* recompute stats just before removing the slave */ + bond_get_stats(bond->dev, &bond->bond_stats); + bond_upper_dev_unlink(bond_dev, slave_dev); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ netdev_rx_handler_unregister(slave_dev); - write_lock_bh(&bond->lock); - /* Inform AD package of unbinding of slave. */ if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); - write_unlock_bh(&bond->lock); + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, slave); netdev_info(bond_dev, "Releasing %s interface %s\n", bond_is_active_slave(slave) ? "active" : "backup", @@ -1712,14 +1695,11 @@ static int __bond_release_one(struct net_device *bond_dev, bond_dev->name, slave_dev->name); } - if (bond->primary_slave == slave) - bond->primary_slave = NULL; + if (rtnl_dereference(bond->primary_slave) == slave) + RCU_INIT_POINTER(bond->primary_slave, NULL); - if (oldcurrent == slave) { - write_lock_bh(&bond->curr_slave_lock); + if (oldcurrent == slave) bond_change_active_slave(bond, NULL); - write_unlock_bh(&bond->curr_slave_lock); - } if (bond_is_lb(bond)) { /* Must be called only after the slave has been @@ -1733,16 +1713,11 @@ static int __bond_release_one(struct net_device *bond_dev, if (all) { RCU_INIT_POINTER(bond->curr_active_slave, NULL); } else if (oldcurrent == slave) { - /* - * Note that we hold RTNL over this sequence, so there + /* Note that we hold RTNL over this sequence, so there * is no concern that another slave add/remove event * will interfere. */ - write_lock_bh(&bond->curr_slave_lock); - bond_select_active_slave(bond); - - write_unlock_bh(&bond->curr_slave_lock); } if (!bond_has_slaves(bond)) { @@ -1765,10 +1740,9 @@ static int __bond_release_one(struct net_device *bond_dev, netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n", slave_dev->name, bond_dev->name); - /* must do this from outside any spinlocks */ vlan_vids_del_by_dev(slave_dev, bond_dev); - /* If the mode uses primary, then this cases was handled above by + /* If the mode uses primary, then this case was handled above by * bond_change_active_slave(..., NULL) */ if (!bond_uses_primary(bond)) { @@ -1808,7 +1782,7 @@ static int __bond_release_one(struct net_device *bond_dev, bond_free_slave(slave); - return 0; /* deletion OK */ + return 0; } /* A wrapper used because of ndo_del_link */ @@ -1817,10 +1791,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) return __bond_release_one(bond_dev, slave_dev, false); } -/* -* First release a slave and then destroy the bond if no more slaves are left. -* Must be under rtnl_lock when this function is called. -*/ +/* First release a slave and then destroy the bond if no more slaves are left. + * Must be under rtnl_lock when this function is called. + */ static int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1843,7 +1816,6 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) info->bond_mode = BOND_MODE(bond); info->miimon = bond->params.miimon; - info->num_slaves = bond->slave_cnt; return 0; @@ -1906,9 +1878,7 @@ static int bond_miimon_inspect(struct bonding *bond) /*FALLTHRU*/ case BOND_LINK_FAIL: if (link_state) { - /* - * recovered before downdelay expired - */ + /* recovered before downdelay expired */ slave->link = BOND_LINK_UP; slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", @@ -1974,7 +1944,7 @@ static int bond_miimon_inspect(struct bonding *bond) static void bond_miimon_commit(struct bonding *bond) { struct list_head *iter; - struct slave *slave; + struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { @@ -1985,13 +1955,14 @@ static void bond_miimon_commit(struct bonding *bond) slave->link = BOND_LINK_UP; slave->last_link_up = jiffies; + primary = rtnl_dereference(bond->primary_slave); if (BOND_MODE(bond) == BOND_MODE_8023AD) { /* prevent it from being the active one */ bond_set_backup_slave(slave); } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */ bond_set_active_slave(slave); - } else if (slave != bond->primary_slave) { + } else if (slave != primary) { /* prevent it from being the active one */ bond_set_backup_slave(slave); } @@ -2009,8 +1980,10 @@ static void bond_miimon_commit(struct bonding *bond) bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); - if (!bond->curr_active_slave || - (slave == bond->primary_slave)) + if (BOND_MODE(bond) == BOND_MODE_XOR) + bond_update_slave_arr(bond, NULL); + + if (!bond->curr_active_slave || slave == primary) goto do_failover; continue; @@ -2037,6 +2010,9 @@ static void bond_miimon_commit(struct bonding *bond) bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); + if (BOND_MODE(bond) == BOND_MODE_XOR) + bond_update_slave_arr(bond, NULL); + if (slave == rcu_access_pointer(bond->curr_active_slave)) goto do_failover; @@ -2051,19 +2027,15 @@ static void bond_miimon_commit(struct bonding *bond) } do_failover: - ASSERT_RTNL(); block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); } bond_set_carrier(bond); } -/* - * bond_mii_monitor +/* bond_mii_monitor * * Really a wrapper that splits the mii monitor into two phases: an * inspection, then (if inspection indicates something needs to be done) @@ -2135,8 +2107,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) return ret; } -/* - * We go to the (large) trouble of VLAN tagging ARP frames because +/* We go to the (large) trouble of VLAN tagging ARP frames because * switches in VLAN mode (especially if ports are configured as * "native" to a VLAN) might not pass non-tagged frames. */ @@ -2363,8 +2334,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, curr_active_slave = rcu_dereference(bond->curr_active_slave); - /* - * Backup slaves won't see the ARP reply, but do come through + /* Backup slaves won't see the ARP reply, but do come through * here for each ARP probe (so we swap the sip/tip to validate * the probe). In a "redundant switch, common router" type of * configuration, the ARP probe will (hopefully) travel from @@ -2404,8 +2374,7 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, last_act + mod * delta_in_ticks + delta_in_ticks/2); } -/* - * this function is called regularly to monitor each slave's link +/* This function is called regularly to monitor each slave's link * ensuring that traffic is being sent and received when arp monitoring * is used in load-balancing mode. if the adapter has been dormant, then an * arp is transmitted to generate traffic. see activebackup_arp_monitor for @@ -2500,16 +2469,11 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) if (slave_state_changed) { bond_slave_state_change(bond); + if (BOND_MODE(bond) == BOND_MODE_XOR) + bond_update_slave_arr(bond, NULL); } else if (do_failover) { - /* the bond_select_active_slave must hold RTNL - * and curr_slave_lock for write. - */ block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); - bond_select_active_slave(bond); - - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); } rtnl_unlock(); @@ -2521,13 +2485,12 @@ re_arm: msecs_to_jiffies(bond->params.arp_interval)); } -/* - * Called to inspect slaves for active-backup mode ARP monitor link state +/* Called to inspect slaves for active-backup mode ARP monitor link state * changes. Sets new_link in slaves to specify what action should take * place for the slave. Returns 0 if no changes are found, >0 if changes * to link states must be committed. * - * Called with rcu_read_lock hold. + * Called with rcu_read_lock held. */ static int bond_ab_arp_inspect(struct bonding *bond) { @@ -2548,16 +2511,14 @@ static int bond_ab_arp_inspect(struct bonding *bond) continue; } - /* - * Give slaves 2*delta after being enslaved or made + /* Give slaves 2*delta after being enslaved or made * active. This avoids bouncing, as the last receive * times need a full ARP monitor cycle to be updated. */ if (bond_time_in_interval(bond, slave->last_link_up, 2)) continue; - /* - * Backup slave is down if: + /* Backup slave is down if: * - No current_arp_slave AND * - more than 3*delta since last receive AND * - the bond has an IP address @@ -2576,8 +2537,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) commit++; } - /* - * Active slave is down if: + /* Active slave is down if: * - more than 2*delta since transmitting OR * - (more than 2*delta since receive AND * the bond has an IP address) @@ -2594,8 +2554,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) return commit; } -/* - * Called to commit link state changes noted by inspection step of +/* Called to commit link state changes noted by inspection step of * active-backup mode ARP monitor. * * Called with RTNL hold. @@ -2631,7 +2590,7 @@ static void bond_ab_arp_commit(struct bonding *bond) slave->dev->name); if (!rtnl_dereference(bond->curr_active_slave) || - (slave == bond->primary_slave)) + slave == rtnl_dereference(bond->primary_slave)) goto do_failover; } @@ -2663,21 +2622,17 @@ static void bond_ab_arp_commit(struct bonding *bond) } do_failover: - ASSERT_RTNL(); block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); } bond_set_carrier(bond); } -/* - * Send ARP probes for active-backup mode ARP monitor. +/* Send ARP probes for active-backup mode ARP monitor. * - * Called with rcu_read_lock hold. + * Called with rcu_read_lock held. */ static bool bond_ab_arp_probe(struct bonding *bond) { @@ -2817,9 +2772,7 @@ re_arm: /*-------------------------- netdev event handling --------------------------*/ -/* - * Change device name - */ +/* Change device name */ static int bond_event_changename(struct bonding *bond) { bond_remove_proc_entry(bond); @@ -2858,7 +2811,7 @@ static int bond_master_netdev_event(unsigned long event, static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { - struct slave *slave = bond_slave_get_rtnl(slave_dev); + struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary; struct bonding *bond; struct net_device *bond_dev; u32 old_speed; @@ -2872,6 +2825,7 @@ static int bond_slave_netdev_event(unsigned long event, return NOTIFY_DONE; bond_dev = slave->bond->dev; bond = slave->bond; + primary = rtnl_dereference(bond->primary_slave); switch (event) { case NETDEV_UNREGISTER: @@ -2893,15 +2847,23 @@ static int bond_slave_netdev_event(unsigned long event, if (old_duplex != slave->duplex) bond_3ad_adapter_duplex_changed(slave); } + /* Refresh slave-array if applicable! + * If the setup does not use miimon or arpmon (mode-specific!), + * then these events will not cause the slave-array to be + * refreshed. This will cause xmit to use a slave that is not + * usable. Avoid such situation by refeshing the array at these + * events. If these (miimon/arpmon) parameters are configured + * then array gets refreshed twice and that should be fine! + */ + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); break; case NETDEV_DOWN: - /* - * ... Or is it this? - */ + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); break; case NETDEV_CHANGEMTU: - /* - * TODO: Should slaves be allowed to + /* TODO: Should slaves be allowed to * independently alter their MTU? For * an active-backup bond, slaves need * not be the same type of device, so @@ -2919,23 +2881,21 @@ static int bond_slave_netdev_event(unsigned long event, !bond->params.primary[0]) break; - if (slave == bond->primary_slave) { + if (slave == primary) { /* slave's name changed - he's no longer primary */ - bond->primary_slave = NULL; + RCU_INIT_POINTER(bond->primary_slave, NULL); } else if (!strcmp(slave_dev->name, bond->params.primary)) { /* we have a new primary slave */ - bond->primary_slave = slave; + rcu_assign_pointer(bond->primary_slave, slave); } else { /* we didn't change primary - exit */ break; } netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n", - bond->primary_slave ? slave_dev->name : "none"); + primary ? slave_dev->name : "none"); block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); break; case NETDEV_FEAT_CHANGE: @@ -2952,8 +2912,7 @@ static int bond_slave_netdev_event(unsigned long event, return NOTIFY_DONE; } -/* - * bond_netdev_event: handle netdev notifier chain events. +/* bond_netdev_event: handle netdev notifier chain events. * * This function receives events for the netdev chain. The caller (an * ioctl handler calling blocking_notifier_call_chain) holds the necessary @@ -3081,6 +3040,7 @@ static void bond_work_init_all(struct bonding *bond) else INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); + INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler); } static void bond_work_cancel_all(struct bonding *bond) @@ -3090,6 +3050,7 @@ static void bond_work_cancel_all(struct bonding *bond) cancel_delayed_work_sync(&bond->alb_work); cancel_delayed_work_sync(&bond->ad_work); cancel_delayed_work_sync(&bond->mcast_work); + cancel_delayed_work_sync(&bond->slave_arr_work); } static int bond_open(struct net_device *bond_dev) @@ -3099,9 +3060,7 @@ static int bond_open(struct net_device *bond_dev) struct slave *slave; /* reset slave->backup and slave->inactive */ - read_lock(&bond->lock); if (bond_has_slaves(bond)) { - read_lock(&bond->curr_slave_lock); bond_for_each_slave(bond, slave, iter) { if (bond_uses_primary(bond) && slave != rcu_access_pointer(bond->curr_active_slave)) { @@ -3112,9 +3071,7 @@ static int bond_open(struct net_device *bond_dev) BOND_SLAVE_NOTIFY_NOW); } } - read_unlock(&bond->curr_slave_lock); } - read_unlock(&bond->lock); bond_work_init_all(bond); @@ -3143,6 +3100,9 @@ static int bond_open(struct net_device *bond_dev) bond_3ad_initiate_agg_selection(bond, 1); } + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); + return 0; } @@ -3167,40 +3127,43 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; - memset(stats, 0, sizeof(*stats)); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); - read_lock_bh(&bond->lock); bond_for_each_slave(bond, slave, iter) { const struct rtnl_link_stats64 *sstats = dev_get_stats(slave->dev, &temp); + struct rtnl_link_stats64 *pstats = &slave->slave_stats; - stats->rx_packets += sstats->rx_packets; - stats->rx_bytes += sstats->rx_bytes; - stats->rx_errors += sstats->rx_errors; - stats->rx_dropped += sstats->rx_dropped; + stats->rx_packets += sstats->rx_packets - pstats->rx_packets; + stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes; + stats->rx_errors += sstats->rx_errors - pstats->rx_errors; + stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped; - stats->tx_packets += sstats->tx_packets; - stats->tx_bytes += sstats->tx_bytes; - stats->tx_errors += sstats->tx_errors; - stats->tx_dropped += sstats->tx_dropped; + stats->tx_packets += sstats->tx_packets - pstats->tx_packets;; + stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes; + stats->tx_errors += sstats->tx_errors - pstats->tx_errors; + stats->tx_dropped += sstats->tx_dropped - pstats->tx_dropped; - stats->multicast += sstats->multicast; - stats->collisions += sstats->collisions; + stats->multicast += sstats->multicast - pstats->multicast; + stats->collisions += sstats->collisions - pstats->collisions; - stats->rx_length_errors += sstats->rx_length_errors; - stats->rx_over_errors += sstats->rx_over_errors; - stats->rx_crc_errors += sstats->rx_crc_errors; - stats->rx_frame_errors += sstats->rx_frame_errors; - stats->rx_fifo_errors += sstats->rx_fifo_errors; - stats->rx_missed_errors += sstats->rx_missed_errors; + stats->rx_length_errors += sstats->rx_length_errors - pstats->rx_length_errors; + stats->rx_over_errors += sstats->rx_over_errors - pstats->rx_over_errors; + stats->rx_crc_errors += sstats->rx_crc_errors - pstats->rx_crc_errors; + stats->rx_frame_errors += sstats->rx_frame_errors - pstats->rx_frame_errors; + stats->rx_fifo_errors += sstats->rx_fifo_errors - pstats->rx_fifo_errors; + stats->rx_missed_errors += sstats->rx_missed_errors - pstats->rx_missed_errors; - stats->tx_aborted_errors += sstats->tx_aborted_errors; - stats->tx_carrier_errors += sstats->tx_carrier_errors; - stats->tx_fifo_errors += sstats->tx_fifo_errors; - stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; - stats->tx_window_errors += sstats->tx_window_errors; + stats->tx_aborted_errors += sstats->tx_aborted_errors - pstats->tx_aborted_errors; + stats->tx_carrier_errors += sstats->tx_carrier_errors - pstats->tx_carrier_errors; + stats->tx_fifo_errors += sstats->tx_fifo_errors - pstats->tx_fifo_errors; + stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors - pstats->tx_heartbeat_errors; + stats->tx_window_errors += sstats->tx_window_errors - pstats->tx_window_errors; + + /* save off the slave stats for the next run */ + memcpy(pstats, sstats, sizeof(*sstats)); } - read_unlock_bh(&bond->lock); + memcpy(&bond->bond_stats, stats, sizeof(*stats)); return stats; } @@ -3229,24 +3192,17 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd mii->phy_id = 0; /* Fall Through */ case SIOCGMIIREG: - /* - * We do this again just in case we were called by SIOCGMIIREG + /* We do this again just in case we were called by SIOCGMIIREG * instead of SIOCGMIIPHY. */ mii = if_mii(ifr); if (!mii) return -EINVAL; - if (mii->reg_num == 1) { mii->val_out = 0; - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); if (netif_carrier_ok(bond->dev)) mii->val_out = BMSR_LSTATUS; - - read_unlock(&bond->curr_slave_lock); - read_unlock(&bond->lock); } return 0; @@ -3277,7 +3233,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd return res; default: - /* Go on */ break; } @@ -3339,7 +3294,6 @@ static void bond_set_rx_mode(struct net_device *bond_dev) struct list_head *iter; struct slave *slave; - rcu_read_lock(); if (bond_uses_primary(bond)) { slave = rcu_dereference(bond->curr_active_slave); @@ -3377,8 +3331,7 @@ static int bond_neigh_init(struct neighbour *n) if (ret) return ret; - /* - * Assign slave's neigh_cleanup to neighbour in case cleanup is called + /* Assign slave's neigh_cleanup to neighbour in case cleanup is called * after the last slave has been detached. Assumes that all slaves * utilize the same neigh_cleanup (true at this writing as only user * is ipoib). @@ -3391,8 +3344,7 @@ static int bond_neigh_init(struct neighbour *n) return parms.neigh_setup(n); } -/* - * The bonding ndo_neigh_setup is called at init time beofre any +/* The bonding ndo_neigh_setup is called at init time beofre any * slave exists. So we must declare proxy setup function which will * be used at run time to resolve the actual slave neigh param setup. * @@ -3410,9 +3362,7 @@ static int bond_neigh_setup(struct net_device *dev, return 0; } -/* - * Change the MTU of all of a master's slaves to match the master - */ +/* Change the MTU of all of a master's slaves to match the master */ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) { struct bonding *bond = netdev_priv(bond_dev); @@ -3422,21 +3372,6 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu); - /* Can't hold bond->lock with bh disabled here since - * some base drivers panic. On the other hand we can't - * hold bond->lock without bh disabled because we'll - * deadlock. The only solution is to rely on the fact - * that we're under rtnl_lock here, and the slaves - * list won't change. This doesn't solve the problem - * of setting the slave's MTU while it is - * transmitting, but the assumption is that the base - * driver can handle that. - * - * TODO: figure out a way to safely iterate the slaves - * list, but without holding a lock around the actual - * call to the base driver. - */ - bond_for_each_slave(bond, slave, iter) { netdev_dbg(bond_dev, "s %p c_m %p\n", slave, slave->dev->netdev_ops->ndo_change_mtu); @@ -3480,8 +3415,7 @@ unwind: return res; } -/* - * Change HW address +/* Change HW address * * Note that many devices must be down to change the HW address, and * downing the master releases all slaves. We can make bonds full of @@ -3511,21 +3445,6 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) if (!is_valid_ether_addr(sa->sa_data)) return -EADDRNOTAVAIL; - /* Can't hold bond->lock with bh disabled here since - * some base drivers panic. On the other hand we can't - * hold bond->lock without bh disabled because we'll - * deadlock. The only solution is to rely on the fact - * that we're under rtnl_lock here, and the slaves - * list won't change. This doesn't solve the problem - * of setting the slave's hw address while it is - * transmitting, but the assumption is that the base - * driver can handle that. - * - * TODO: figure out a way to safely iterate the slaves - * list, but without holding a lock around the actual - * call to the base driver. - */ - bond_for_each_slave(bond, slave, iter) { netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name); res = dev_set_mac_address(slave->dev, addr); @@ -3654,7 +3573,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev */ if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { slave = rcu_dereference(bond->curr_active_slave); - if (slave && bond_slave_can_tx(slave)) + if (slave) bond_dev_queue_xmit(bond, skb, slave->dev); else bond_xmit_slave_id(bond, skb, 0); @@ -3672,8 +3591,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev return NETDEV_TX_OK; } -/* - * in active-backup mode, we know that bond->curr_active_slave is always valid if +/* In active-backup mode, we know that bond->curr_active_slave is always valid if * the bond has a usable interface. */ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) @@ -3690,20 +3608,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d return NETDEV_TX_OK; } -/* In bond_xmit_xor() , we determine the output device by using a pre- - * determined xmit_hash_policy(), If the selected device is not enabled, - * find the next active slave. +/* Use this to update slave_array when (a) it's not appropriate to update + * slave_array right away (note that update_slave_array() may sleep) + * and / or (b) RTNL is not held. */ -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) +void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay) { - struct bonding *bond = netdev_priv(bond_dev); - int slave_cnt = ACCESS_ONCE(bond->slave_cnt); + queue_delayed_work(bond->wq, &bond->slave_arr_work, delay); +} - if (likely(slave_cnt)) - bond_xmit_slave_id(bond, skb, - bond_xmit_hash(bond, skb) % slave_cnt); - else +/* Slave array work handler. Holds only RTNL */ +static void bond_slave_arr_handler(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + slave_arr_work.work); + int ret; + + if (!rtnl_trylock()) + goto err; + + ret = bond_update_slave_arr(bond, NULL); + rtnl_unlock(); + if (ret) { + pr_warn_ratelimited("Failed to update slave array from WT\n"); + goto err; + } + return; + +err: + bond_slave_arr_work_rearm(bond, 1); +} + +/* Build the usable slaves array in control path for modes that use xmit-hash + * to determine the slave interface - + * (a) BOND_MODE_8023AD + * (b) BOND_MODE_XOR + * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0 + * + * The caller is expected to hold RTNL only and NO other lock! + */ +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) +{ + struct slave *slave; + struct list_head *iter; + struct bond_up_slave *new_arr, *old_arr; + int slaves_in_agg; + int agg_id = 0; + int ret = 0; + +#ifdef CONFIG_LOCKDEP + WARN_ON(lockdep_is_held(&bond->mode_lock)); +#endif + + new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), + GFP_KERNEL); + if (!new_arr) { + ret = -ENOMEM; + pr_err("Failed to build slave-array.\n"); + goto out; + } + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + struct ad_info ad_info; + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + pr_debug("bond_3ad_get_active_agg_info failed\n"); + kfree_rcu(new_arr, rcu); + /* No active aggragator means it's not safe to use + * the previous array. + */ + old_arr = rtnl_dereference(bond->slave_arr); + if (old_arr) { + RCU_INIT_POINTER(bond->slave_arr, NULL); + kfree_rcu(old_arr, rcu); + } + goto out; + } + slaves_in_agg = ad_info.ports; + agg_id = ad_info.aggregator_id; + } + bond_for_each_slave(bond, slave, iter) { + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + struct aggregator *agg; + + agg = SLAVE_AD_INFO(slave)->port.aggregator; + if (!agg || agg->aggregator_identifier != agg_id) + continue; + } + if (!bond_slave_can_tx(slave)) + continue; + if (skipslave == slave) + continue; + new_arr->arr[new_arr->count++] = slave; + } + + old_arr = rtnl_dereference(bond->slave_arr); + rcu_assign_pointer(bond->slave_arr, new_arr); + if (old_arr) + kfree_rcu(old_arr, rcu); +out: + if (ret != 0 && skipslave) { + int idx; + + /* Rare situation where caller has asked to skip a specific + * slave but allocation failed (most likely!). BTW this is + * only possible when the call is initiated from + * __bond_release_one(). In this situation; overwrite the + * skipslave entry in the array with the last entry from the + * array to avoid a situation where the xmit path may choose + * this to-be-skipped slave to send a packet out. + */ + old_arr = rtnl_dereference(bond->slave_arr); + for (idx = 0; idx < old_arr->count; idx++) { + if (skipslave == old_arr->arr[idx]) { + old_arr->arr[idx] = + old_arr->arr[old_arr->count-1]; + old_arr->count--; + break; + } + } + } + return ret; +} + +/* Use this Xmit function for 3AD as well as XOR modes. The current + * usable slave array is formed in the control path. The xmit function + * just calculates hash and sends the packet out. + */ +int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = netdev_priv(dev); + struct slave *slave; + struct bond_up_slave *slaves; + unsigned int count; + + slaves = rcu_dereference(bond->slave_arr); + count = slaves ? ACCESS_ONCE(slaves->count) : 0; + if (likely(count)) { + slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; + bond_dev_queue_xmit(bond, skb, slave->dev); + } else { dev_kfree_skb_any(skb); + atomic_long_inc(&dev->tx_dropped); + } return NETDEV_TX_OK; } @@ -3726,7 +3772,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) bond_dev->name, __func__); continue; } - /* bond_dev_queue_xmit always returns 0 */ bond_dev_queue_xmit(bond, skb2, slave->dev); } } @@ -3740,9 +3785,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) /*------------------------- Device initialization ---------------------------*/ -/* - * Lookup the slave that corresponds to a qid - */ +/* Lookup the slave that corresponds to a qid */ static inline int bond_slave_override(struct bonding *bond, struct sk_buff *skb) { @@ -3771,17 +3814,14 @@ static inline int bond_slave_override(struct bonding *bond, static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { - /* - * This helper function exists to help dev_pick_tx get the correct + /* This helper function exists to help dev_pick_tx get the correct * destination queue. Using a helper function skips a call to * skb_tx_hash and will put the skbs in the queue we expect on their * way down to the bonding driver. */ u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; - /* - * Save the original txq to restore before passing to the driver - */ + /* Save the original txq to restore before passing to the driver */ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { @@ -3805,12 +3845,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev return bond_xmit_roundrobin(skb, dev); case BOND_MODE_ACTIVEBACKUP: return bond_xmit_activebackup(skb, dev); + case BOND_MODE_8023AD: case BOND_MODE_XOR: - return bond_xmit_xor(skb, dev); + return bond_3ad_xor_xmit(skb, dev); case BOND_MODE_BROADCAST: return bond_xmit_broadcast(skb, dev); - case BOND_MODE_8023AD: - return bond_3ad_xmit_xor(skb, dev); case BOND_MODE_ALB: return bond_alb_xmit(skb, dev); case BOND_MODE_TLB: @@ -3829,8 +3868,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) struct bonding *bond = netdev_priv(dev); netdev_tx_t ret = NETDEV_TX_OK; - /* - * If we risk deadlock from transmitting this in the + /* If we risk deadlock from transmitting this in the * netpoll path, tell netpoll to queue the frame for later tx */ if (unlikely(is_netpoll_tx_blocked(dev))) @@ -3862,7 +3900,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev, * the true receive or transmit bandwidth (not all modes are symmetric) * this is an accurate maximum. */ - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (bond_slave_can_tx(slave)) { if (slave->speed != SPEED_UNKNOWN) @@ -3873,7 +3910,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev, } } ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); - read_unlock(&bond->lock); return 0; } @@ -3935,9 +3971,7 @@ void bond_setup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - /* initialize rwlocks */ - rwlock_init(&bond->lock); - rwlock_init(&bond->curr_slave_lock); + spin_lock_init(&bond->mode_lock); bond->params = bonding_defaults; /* Initialize pointers */ @@ -3958,8 +3992,7 @@ void bond_setup(struct net_device *bond_dev) bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT; bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); - /* don't acquire bond device's netif_tx_lock when - * transmitting */ + /* don't acquire bond device's netif_tx_lock when transmitting */ bond_dev->features |= NETIF_F_LLTX; /* By default, we declare the bond to be fully @@ -3982,15 +4015,15 @@ void bond_setup(struct net_device *bond_dev) bond_dev->features |= bond_dev->hw_features; } -/* -* Destroy a bonding device. -* Must be under rtnl_lock when this function is called. -*/ +/* Destroy a bonding device. + * Must be under rtnl_lock when this function is called. + */ static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct list_head *iter; struct slave *slave; + struct bond_up_slave *arr; bond_netpoll_cleanup(bond_dev); @@ -3999,6 +4032,12 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true); netdev_info(bond_dev, "Released all slaves\n"); + arr = rtnl_dereference(bond->slave_arr); + if (arr) { + RCU_INIT_POINTER(bond->slave_arr, NULL); + kfree_rcu(arr, rcu); + } + list_del(&bond->bond_list); bond_debug_unregister(bond); @@ -4013,9 +4052,7 @@ static int bond_check_params(struct bond_params *params) const struct bond_opt_value *valptr; int arp_all_targets_value; - /* - * Convert string parameters. - */ + /* Convert string parameters. */ if (mode) { bond_opt_initstr(&newval, mode); valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); @@ -4192,9 +4229,9 @@ static int bond_check_params(struct bond_params *params) for (arp_ip_count = 0, i = 0; (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { - /* not complete check, but should be good enough to - catch mistakes */ __be32 ip; + + /* not a complete check, but good enough to catch mistakes */ if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || !bond_is_ip_target_ok(ip)) { pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", @@ -4377,26 +4414,14 @@ static void bond_set_lockdep_class(struct net_device *dev) dev->qdisc_tx_busylock = &bonding_tx_busylock_key; } -/* - * Called from registration process - */ +/* Called from registration process */ static int bond_init(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); netdev_dbg(bond_dev, "Begin bond_init\n"); - /* - * Initialize locks that may be required during - * en/deslave operations. All of the bond_open work - * (of which this is part) should really be moved to - * a phase prior to dev_open - */ - spin_lock_init(&(bond_info->tx_hashtbl_lock)); - spin_lock_init(&(bond_info->rx_hashtbl_lock)); - bond->wq = create_singlethread_workqueue(bond_dev->name); if (!bond->wq) return -ENOMEM; @@ -4543,9 +4568,7 @@ static void __exit bonding_exit(void) unregister_pernet_subsys(&bond_net_ops); #ifdef CONFIG_NET_POLL_CONTROLLER - /* - * Make sure we don't have an imbalance on our netpoll blocking - */ + /* Make sure we don't have an imbalance on our netpoll blocking */ WARN_ON(atomic_read(&netpoll_block_tx)); #endif } diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index d163e112f04c..c13d83e15ace 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -96,6 +96,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, }; +static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { + [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, +}; + static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { @@ -107,6 +111,33 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static int bond_slave_changelink(struct net_device *bond_dev, + struct net_device *slave_dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct bond_opt_value newval; + int err; + + if (!data) + return 0; + + if (data[IFLA_BOND_SLAVE_QUEUE_ID]) { + u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]); + char queue_id_str[IFNAMSIZ + 7]; + + /* queue_id option setting expects slave_name:queue_id */ + snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n", + slave_dev->name, queue_id); + bond_opt_initstr(&newval, queue_id_str); + err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval); + if (err) + return err; + } + + return 0; +} + static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -412,6 +443,7 @@ static int bond_fill_info(struct sk_buff *skb, unsigned int packets_per_slave; int ifindex, i, targets_added; struct nlattr *targets; + struct slave *primary; if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) goto nla_put_failure; @@ -461,9 +493,9 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.arp_all_targets)) goto nla_put_failure; - if (bond->primary_slave && - nla_put_u32(skb, IFLA_BOND_PRIMARY, - bond->primary_slave->dev->ifindex)) + primary = rtnl_dereference(bond->primary_slave); + if (primary && + nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, @@ -562,6 +594,9 @@ struct rtnl_link_ops bond_link_ops __read_mostly = { .get_num_tx_queues = bond_get_num_tx_queues, .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number as for TX queues */ + .slave_maxtype = IFLA_BOND_SLAVE_MAX, + .slave_policy = bond_slave_policy, + .slave_changelink = bond_slave_changelink, .get_slave_size = bond_get_slave_size, .fill_slave_info = bond_fill_slave_info, }; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dc73463c2c23..b62697f4a3de 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond, out: if (ret) bond_opt_error_interpret(bond, opt, ret, val); + else + call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev); return ret; } @@ -732,15 +734,13 @@ static int bond_option_active_slave_set(struct bonding *bond, } block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); - /* check to see if we are clearing active */ if (!slave_dev) { netdev_info(bond->dev, "Clearing current active slave\n"); RCU_INIT_POINTER(bond->curr_active_slave, NULL); bond_select_active_slave(bond); } else { - struct slave *old_active = bond_deref_active_protected(bond); + struct slave *old_active = rtnl_dereference(bond->curr_active_slave); struct slave *new_active = bond_slave_get_rtnl(slave_dev); BUG_ON(!new_active); @@ -763,8 +763,6 @@ static int bond_option_active_slave_set(struct bonding *bond, } } } - - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); return ret; @@ -953,14 +951,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) { - int ret; - - /* not to race with bond_arp_rcv */ - write_lock_bh(&bond->lock); - ret = _bond_option_arp_ip_target_add(bond, target); - write_unlock_bh(&bond->lock); - - return ret; + return _bond_option_arp_ip_target_add(bond, target); } static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) @@ -989,9 +980,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) netdev_info(bond->dev, "Removing ARP target %pI4\n", &target); - /* not to race with bond_arp_rcv */ - write_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave, iter) { targets_rx = slave->target_last_arp_rx; for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) @@ -1002,8 +990,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) targets[i] = targets[i+1]; targets[i] = 0; - write_unlock_bh(&bond->lock); - return 0; } @@ -1011,11 +997,8 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond) { int i; - /* not to race with bond_arp_rcv */ - write_lock_bh(&bond->lock); for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) _bond_options_arp_ip_target_set(bond, i, 0, 0); - write_unlock_bh(&bond->lock); } static int bond_option_arp_ip_targets_set(struct bonding *bond, @@ -1079,8 +1062,6 @@ static int bond_option_primary_set(struct bonding *bond, struct slave *slave; block_netpoll_tx(); - read_lock(&bond->lock); - write_lock_bh(&bond->curr_slave_lock); p = strchr(primary, '\n'); if (p) @@ -1088,7 +1069,7 @@ static int bond_option_primary_set(struct bonding *bond, /* check to see if we are clearing primary */ if (!strlen(primary)) { netdev_info(bond->dev, "Setting primary slave to None\n"); - bond->primary_slave = NULL; + RCU_INIT_POINTER(bond->primary_slave, NULL); memset(bond->params.primary, 0, sizeof(bond->params.primary)); bond_select_active_slave(bond); goto out; @@ -1098,16 +1079,16 @@ static int bond_option_primary_set(struct bonding *bond, if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) { netdev_info(bond->dev, "Setting %s as primary slave\n", slave->dev->name); - bond->primary_slave = slave; + rcu_assign_pointer(bond->primary_slave, slave); strcpy(bond->params.primary, slave->dev->name); bond_select_active_slave(bond); goto out; } } - if (bond->primary_slave) { + if (rtnl_dereference(bond->primary_slave)) { netdev_info(bond->dev, "Setting primary slave to None\n"); - bond->primary_slave = NULL; + RCU_INIT_POINTER(bond->primary_slave, NULL); bond_select_active_slave(bond); } strncpy(bond->params.primary, primary, IFNAMSIZ); @@ -1117,8 +1098,6 @@ static int bond_option_primary_set(struct bonding *bond, primary, bond->dev->name); out: - write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); unblock_netpoll_tx(); return 0; @@ -1132,9 +1111,7 @@ static int bond_option_primary_reselect_set(struct bonding *bond, bond->params.primary_reselect = newval->value; block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); return 0; diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index de62c0385dfb..a3948f8d1e53 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -7,21 +7,18 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) - __acquires(&bond->lock) { struct bonding *bond = seq->private; struct list_head *iter; struct slave *slave; loff_t off = 0; - /* make sure the bond won't be taken away */ rcu_read_lock(); - read_lock(&bond->lock); if (*pos == 0) return SEQ_START_TOKEN; - bond_for_each_slave(bond, slave, iter) + bond_for_each_slave_rcu(bond, slave, iter) if (++off == *pos) return slave; @@ -37,12 +34,9 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; if (v == SEQ_START_TOKEN) - return bond_first_slave(bond); + return bond_first_slave_rcu(bond); - if (bond_is_last_slave(bond, v)) - return NULL; - - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (found) return slave; if (slave == v) @@ -53,12 +47,8 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void bond_info_seq_stop(struct seq_file *seq, void *v) - __releases(&bond->lock) __releases(RCU) { - struct bonding *bond = seq->private; - - read_unlock(&bond->lock); rcu_read_unlock(); } @@ -66,7 +56,7 @@ static void bond_info_show_master(struct seq_file *seq) { struct bonding *bond = seq->private; const struct bond_opt_value *optval; - struct slave *curr; + struct slave *curr, *primary; int i; curr = rcu_dereference(bond->curr_active_slave); @@ -83,8 +73,7 @@ static void bond_info_show_master(struct seq_file *seq) seq_printf(seq, "\n"); - if (BOND_MODE(bond) == BOND_MODE_XOR || - BOND_MODE(bond) == BOND_MODE_8023AD) { + if (bond_mode_uses_xmit_hash(bond)) { optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", @@ -92,10 +81,10 @@ static void bond_info_show_master(struct seq_file *seq) } if (bond_uses_primary(bond)) { + primary = rcu_dereference(bond->primary_slave); seq_printf(seq, "Primary Slave: %s", - (bond->primary_slave) ? - bond->primary_slave->dev->name : "None"); - if (bond->primary_slave) { + primary ? primary->dev->name : "None"); + if (primary) { optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, bond->params.primary_reselect); seq_printf(seq, " (primary_reselect %s)", diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 98db8edd9c75..8ffbafd500fd 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -91,7 +91,6 @@ static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifna * creates and deletes entire bonds. * * The class parameter is ignored. - * */ static ssize_t bonding_store_bonds(struct class *cls, struct class_attribute *attr, @@ -425,11 +424,15 @@ static ssize_t bonding_show_primary(struct device *d, struct device_attribute *attr, char *buf) { - int count = 0; struct bonding *bond = to_bond(d); + struct slave *primary; + int count = 0; - if (bond->primary_slave) - count = sprintf(buf, "%s\n", bond->primary_slave->dev->name); + rcu_read_lock(); + primary = rcu_dereference(bond->primary_slave); + if (primary) + count = sprintf(buf, "%s\n", primary->dev->name); + rcu_read_unlock(); return count; } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index aace510d08d1..10920f0686e2 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -24,6 +24,7 @@ #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/reciprocal_div.h> +#include <linux/if_link.h> #include "bond_3ad.h" #include "bond_alb.h" @@ -83,7 +84,7 @@ * @pos: current slave * @iter: list_head * iterator * - * Caller must hold bond->lock + * Caller must hold RTNL */ #define bond_for_each_slave(bond, pos, iter) \ netdev_for_each_lower_private((bond)->dev, pos, iter) @@ -175,6 +176,13 @@ struct slave { struct netpoll *np; #endif struct kobject kobj; + struct rtnl_link_stats64 slave_stats; +}; + +struct bond_up_slave { + unsigned int count; + struct rcu_head rcu; + struct slave *arr[0]; }; /* @@ -184,24 +192,26 @@ struct slave { /* * Here are the locking policies for the two bonding locks: - * - * 1) Get bond->lock when reading/writing slave list. - * 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave. - * (It is unnecessary when the write-lock is put with bond->lock.) - * 3) When we lock with bond->curr_slave_lock, we must lock with bond->lock - * beforehand. + * Get rcu_read_lock when reading or RTNL when writing slave list. */ struct bonding { struct net_device *dev; /* first - useful for panic debug */ struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; - struct slave *primary_slave; + struct slave __rcu *primary_slave; + struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); - rwlock_t lock; - rwlock_t curr_slave_lock; + /* mode_lock is used for mode-specific locking needs, currently used by: + * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and + * bond_3ad_state_machine_handler() concurrently and also + * the access to the state machine shared variables. + * TLB mode (5) - to sync the use and modifications of its hash table + * ALB mode (6) - to sync the use and modifications of its hash table + */ + spinlock_t mode_lock; u8 send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS @@ -219,10 +229,12 @@ struct bonding { struct delayed_work alb_work; struct delayed_work ad_work; struct delayed_work mcast_work; + struct delayed_work slave_arr_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ + struct rtnl_link_stats64 bond_stats; }; #define bond_slave_get_rcu(dev) \ @@ -231,10 +243,6 @@ struct bonding { #define bond_slave_get_rtnl(dev) \ ((struct slave *) rtnl_dereference(dev->rx_handler_data)) -#define bond_deref_active_protected(bond) \ - rcu_dereference_protected(bond->curr_active_slave, \ - lockdep_is_held(&bond->curr_slave_lock)) - struct bond_vlan_tag { __be16 vlan_proto; unsigned short vlan_id; @@ -274,6 +282,13 @@ static inline bool bond_is_nondyn_tlb(const struct bonding *bond) (bond->params.tlb_dynamic_lb == 0); } +static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond) +{ + return (BOND_MODE(bond) == BOND_MODE_8023AD || + BOND_MODE(bond) == BOND_MODE_XOR || + bond_is_nondyn_tlb(bond)); +} + static inline bool bond_mode_uses_arp(int mode) { return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && @@ -527,6 +542,8 @@ const char *bond_slave_link_status(s8 link); struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, struct net_device *end_dev, int level); +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); +void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); |