From b0ce3508b25ea6fa10ae3ca254de1d695b521702 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 May 2013 07:34:53 +0000 Subject: bonding: allow TSO being set on bonding master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some situations, we need to disable TSO on bonding slaves. bonding device automatically unset TSO in bond_fix_features(), and performance is not good because : 1) We consume more cpu cycles. 2) GSO segmentation has some bugs leading to out of order TCP packets if this segmentation is done before virtual device. This particular problem will be addressed in a separate patch. This patch allows TSO being set/unset on the bonding master, so that GSO segmentation is done after bonding layer. Signed-off-by: Eric Dumazet Cc: Michał Mirosław Cc: Jay Vosburgh Cc: Andy Gospodarek Cc: Maciej Żenczykowski Cc: Tom Herbert Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d0aade04e49a..449ad9bbe45c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1362,6 +1362,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, slave->dev->features, mask); } + features = netdev_add_tso_features(features, mask); out: read_unlock(&bond->lock); -- cgit v1.2.3 From acca2674a71816c5c9d0caa81fecd33b491fd68f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 18 May 2013 01:18:29 +0000 Subject: bonding: replace %x with %pI4 for IPv4 addresses There're few pr_debug() places that can provide the IPv4 address in dotted decimal format instead which is more helpful. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 449ad9bbe45c..d4635987bda9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2556,8 +2556,8 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ { struct sk_buff *skb; - pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, - slave_dev->name, dest_ip, src_ip, vlan_id); + pr_debug("arp %d on slave %s: dst %pI4 src %pI4 vid %d\n", arp_op, + slave_dev->name, &dest_ip, &src_ip, vlan_id); skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, NULL, slave_dev->dev_addr, NULL); @@ -2589,7 +2589,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) __be32 addr; if (!targets[i]) break; - pr_debug("basa: target %x\n", targets[i]); + pr_debug("basa: target %pI4\n", &targets[i]); if (!bond_vlan_used(bond)) { pr_debug("basa: empty vlan: arp_send\n"); addr = bond_confirm_addr(bond->dev, targets[i], 0); -- cgit v1.2.3 From 5a5c5fd48e3bcd57572e9a7a4964ed8f38a20b87 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 18 May 2013 01:18:30 +0000 Subject: bonding: arp_ip_count and arp_targets can be wrong When getting arp_ip_targets if we encounter a bad IP, arp_ip_count still gets increased and all the targets after the wrong one will not be probed if arp_interval is enabled after that (unless a new IP target is added through sysfs) because of the zero entry, in this case reading arp_ip_target through sysfs will show valid targets even if there's a zero entry. Example: 1.2.3.4,4.5.6.7,blah,5.6.7.8 When retrieving the list from arp_ip_target the output would be: 1.2.3.4,4.5.6.7,5.6.7.8 but there will be a 0 entry between 4.5.6.7 and 5.6.7.8. If arp_interval is enabled after that 5.6.7.8 will never be checked because of that. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d4635987bda9..29b846cbfb48 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4471,7 +4471,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { - int arp_validate_value, fail_over_mac_value, primary_reselect_value; + int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; /* * Convert string parameters. @@ -4651,19 +4651,18 @@ static int bond_check_params(struct bond_params *params) arp_interval = BOND_LINK_ARP_INTERV; } - for (arp_ip_count = 0; - (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; - arp_ip_count++) { + for (arp_ip_count = 0, i = 0; + (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { /* not complete check, but should be good enough to catch mistakes */ - __be32 ip = in_aton(arp_ip_target[arp_ip_count]); - if (!isdigit(arp_ip_target[arp_ip_count][0]) || - ip == 0 || ip == htonl(INADDR_BROADCAST)) { + __be32 ip = in_aton(arp_ip_target[i]); + if (!isdigit(arp_ip_target[i][0]) || ip == 0 || + ip == htonl(INADDR_BROADCAST)) { pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", - arp_ip_target[arp_ip_count]); + arp_ip_target[i]); arp_interval = 0; } else { - arp_target[arp_ip_count] = ip; + arp_target[arp_ip_count++] = ip; } } @@ -4697,8 +4696,6 @@ static int bond_check_params(struct bond_params *params) if (miimon) { pr_info("MII link monitoring set to %d ms\n", miimon); } else if (arp_interval) { - int i; - pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", arp_interval, arp_validate_tbl[arp_validate_value].modename, -- cgit v1.2.3 From b8fad459f9cc8417b74f71c6c229eef7412163d1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 12 Jun 2013 00:07:01 +0200 Subject: bonding: reset master mac on first enslave failure If the bond device is supposed to get the first slave's MAC address and the first enslavement fails then we need to reset the master's MAC otherwise it will stay the same as the failed slave device. We do it after err_undo_flags since that is the first place where the MAC can be changed and we check if it should've been the first slave and if the bond's MAC was set to it because that err place is used by multiple locations prior to changing the master's MAC address. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 29b846cbfb48..473633ab5f56 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1957,6 +1957,10 @@ err_free: err_undo_flags: bond_compute_features(bond); + /* Enslave of first slave has failed and we need to fix master's mac */ + if (bond->slave_cnt == 0 && + ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) + eth_hw_addr_random(bond_dev); return res; } -- cgit v1.2.3 From 4f5474e7fd68988cb11373fc698bf10b35b49e31 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 12 Jun 2013 00:07:02 +0200 Subject: bonding: fix igmp_retrans type and two related races First the type of igmp_retrans (which is the actual counter of igmp_resend parameter) is changed to u8 to be able to store values up to 255 (as per documentation). There are two races that were hidden there and which are easy to trigger after the previous fix, the first is between bond_resend_igmp_join_requests and bond_change_active_slave where igmp_retrans is set and can be altered by the periodic. The second race condition is between multiple running instances of the periodic (upon execution it can be scheduled again for immediate execution which can cause the counter to go < 0 which in the unsigned case leads to unnecessary igmp retransmissions). Since in bond_change_active_slave bond->lock is held for reading and curr_slave_lock for writing, we use curr_slave_lock for mutual exclusion. We can't drop them as there're cases where RTNL is not held when bond_change_active_slave is called. RCU is unlocked in bond_resend_igmp_join_requests before getting curr_slave_lock since we don't need it there and it's pointless to delay. The decrement is moved inside the "if" block because if we decrement unconditionally there's still a possibility for a race condition although it is much more difficult to hit (many changes have to happen in a very short period in order to trigger) which in the case of 3 parallel running instances of this function and igmp_retrans == 1 (with check bond->igmp_retrans-- > 1) is: f1 passes, doesn't re-schedule, but decrements - igmp_retrans = 0 f2 then passes, doesn't re-schedule, but decrements - igmp_retrans = 255 f3 does the unnecessary retransmissions. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 15 +++++++++++---- drivers/net/bonding/bonding.h | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 473633ab5f56..02d9ae7d527e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -764,8 +764,8 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) struct net_device *bond_dev, *vlan_dev, *upper_dev; struct vlan_entry *vlan; - rcu_read_lock(); read_lock(&bond->lock); + rcu_read_lock(); bond_dev = bond->dev; @@ -787,12 +787,19 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) if (vlan_dev) __bond_resend_igmp_join_requests(vlan_dev); } + rcu_read_unlock(); - if (--bond->igmp_retrans > 0) + /* We use curr_slave_lock to protect against concurrent access to + * igmp_retrans from multiple running instances of this function and + * bond_change_active_slave + */ + write_lock_bh(&bond->curr_slave_lock); + if (bond->igmp_retrans > 1) { + bond->igmp_retrans--; queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - + } + write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); - rcu_read_unlock(); } static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 2baec24388b1..f989e1529a29 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -225,7 +225,7 @@ struct bonding { rwlock_t curr_slave_lock; u8 send_peer_notif; s8 setup_by_slave; - s8 igmp_retrans; + u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; char proc_file_name[IFNAMSIZ]; -- cgit v1.2.3